diff --git a/assets/shaders/inc/meshlet_primitive_cull.h b/assets/shaders/inc/meshlet_primitive_cull.h index f2af9cb7..5a50d9bd 100644 --- a/assets/shaders/inc/meshlet_primitive_cull.h +++ b/assets/shaders/inc/meshlet_primitive_cull.h @@ -288,7 +288,7 @@ void meshlet_emit_primitive(uvec3 prim, vec4 clip_pos, vec4 viewport) if (is_active_prim) { #ifdef MESHLET_PRIMITIVE_CULL_SHARED_INDEX - MESHLET_PRIMITIVE_CULL_SHARED_INDEX[compacted_index_output()] = u8vec3(remap_index_buffer(prim)); + MESHLET_PRIMITIVE_CULL_SHARED_INDEX[compacted_index_output()] = pack32(u8vec4(remap_index_buffer(prim), 0)); #else gl_PrimitiveTriangleIndicesEXT[compacted_index_output()] = remap_index_buffer(prim); #endif diff --git a/tests/assets/shaders/meshlet_debug.mesh b/tests/assets/shaders/meshlet_debug.mesh index b966d5f8..3bbebab4 100644 --- a/tests/assets/shaders/meshlet_debug.mesh +++ b/tests/assets/shaders/meshlet_debug.mesh @@ -31,14 +31,12 @@ layout(local_size_x = 32, local_size_y_id = 0) in; #include "meshlet_attribute_decode.h" #include "meshlet_render_types.h" -#define LOCAL_INVOCATION_INDEXED 0 - -#if MESHLET_SIZE > 32 && LOCAL_INVOCATION_INDEXED +#if MESHLET_LOCAL_INVOCATION_INDEXED #define MESHLET_PRIMITIVE_CULL_SHARED_INDEX shared_indices -shared u8vec3 shared_indices[MESHLET_SIZE]; +shared uint shared_indices[MESHLET_SIZE]; shared u8vec4 shared_nt[MESHLET_SIZE]; shared bool shared_t_sign[MESHLET_SIZE]; -shared int shared_exponent; +shared int shared_exponent[MESHLET_SIZE]; shared i16vec2 shared_uv[MESHLET_SIZE]; shared vec4 shared_clip_pos[MESHLET_SIZE]; #endif @@ -146,8 +144,7 @@ void main() shared_clip_pos[out_vert_index] = clip_pos; shared_uv[out_vert_index] = uv; shared_nt[out_vert_index] = nt; - if (out_vert_index == 0) - shared_exponent = exponent; + shared_exponent[out_vert_index] = exponent; #else gl_MeshVerticesEXT[out_vert_index].gl_Position = clip_pos; vUV[out_vert_index] = attribute_decode_snorm_exp_uv(uv, exponent); @@ -162,7 +159,7 @@ void main() if (gl_LocalInvocationIndex < shared_active_vert_count_total) { i16vec2 uv = shared_uv[gl_LocalInvocationIndex]; - int exponent = shared_exponent; + int exponent = shared_exponent[gl_LocalInvocationIndex]; u8vec4 nt = shared_nt[gl_LocalInvocationIndex]; bool t_sign = shared_t_sign[gl_LocalInvocationIndex]; @@ -177,7 +174,7 @@ void main() if (gl_LocalInvocationIndex < shared_active_prim_count_total) { #ifdef MESHLET_PRIMITIVE_CULL_SHARED_INDEX - gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(shared_indices[gl_LocalInvocationIndex]); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(unpack8(shared_indices[gl_LocalInvocationIndex]).xyz); #endif vDrawID[gl_LocalInvocationIndex] = task.meshlet_index; } diff --git a/tests/assets/shaders/meshlet_debug_plain.mesh b/tests/assets/shaders/meshlet_debug_plain.mesh index b2c65def..ad561b5a 100644 --- a/tests/assets/shaders/meshlet_debug_plain.mesh +++ b/tests/assets/shaders/meshlet_debug_plain.mesh @@ -16,8 +16,11 @@ #error "Must define MESHLET_SIZE" #endif -#if MESHLET_SIZE > 32 && !MESHLET_VERTEX_ID +#if MESHLET_LOCAL_INVOCATION_INDEXED && !MESHLET_VERTEX_ID shared uint shared_attr_index[MESHLET_SIZE]; +shared vec4 shared_clip_pos[MESHLET_SIZE]; +#define MESHLET_PRIMITIVE_CULL_SHARED_INDEX shared_indices +shared uint shared_indices[MESHLET_SIZE]; #endif layout(max_primitives = MESHLET_SIZE, max_vertices = MESHLET_SIZE, triangles) out; @@ -141,13 +144,15 @@ void main() { uint out_vert_index = meshlet_compacted_vertex_output(); uint vert_id = meshlet.vertex_offset + linear_index; - gl_MeshVerticesEXT[out_vert_index].gl_Position = clip_pos; #if MESHLET_VERTEX_ID + gl_MeshVerticesEXT[out_vert_index].gl_Position = clip_pos; vVertexID[out_vert_index] = vert_id; -#elif MESHLET_SIZE > 32 +#elif defined(MESHLET_PRIMITIVE_CULL_SHARED_INDEX) + shared_clip_pos[out_vert_index] = clip_pos; shared_attr_index[out_vert_index] = vert_id; #else + gl_MeshVerticesEXT[out_vert_index].gl_Position = clip_pos; TexturedAttr a = attr.data[vert_id]; mediump vec3 n = unpack_bgr10a2(a.n).xyz; mediump vec4 t = unpack_bgr10a2(a.t); @@ -157,7 +162,7 @@ void main() #endif } -#if MESHLET_SIZE > 32 && !MESHLET_VERTEX_ID +#ifdef MESHLET_PRIMITIVE_CULL_SHARED_INDEX barrier(); if (gl_LocalInvocationIndex < shared_active_vert_count_total) @@ -165,6 +170,7 @@ void main() TexturedAttr a = attr.data[shared_attr_index[gl_LocalInvocationIndex]]; mediump vec3 n = unpack_bgr10a2(a.n).xyz; mediump vec4 t = unpack_bgr10a2(a.t); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = shared_clip_pos[gl_LocalInvocationIndex]; vUV[gl_LocalInvocationIndex] = a.uv; vNormal[gl_LocalInvocationIndex] = mat3(M) * n; vTangent[gl_LocalInvocationIndex] = vec4(mat3(M) * t.xyz, t.w); @@ -174,7 +180,7 @@ void main() if (gl_LocalInvocationIndex < shared_active_prim_count_total) { #ifdef MESHLET_PRIMITIVE_CULL_SHARED_INDEX - gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(shared_primitive[gl_LocalInvocationIndex]); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(unpack8(shared_indices[gl_LocalInvocationIndex]).xyz); #endif #if MESHLET_VERTEX_ID vTransformIndex[gl_LocalInvocationIndex] = task.node_offset; diff --git a/tests/meshlet_viewer.cpp b/tests/meshlet_viewer.cpp index e8e43dcf..b18a8ee3 100644 --- a/tests/meshlet_viewer.cpp +++ b/tests/meshlet_viewer.cpp @@ -366,7 +366,7 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler // if (ui.indirect_rendering) ui.use_preculling = Util::get_environment_bool("PRECULL", ui.use_preculling); - if (!device.get_device_features().mesh_shader_features.taskShader) + if (!device.get_device_features().mesh_shader_features.taskShader && ui.indirect_rendering) ui.use_preculling = true; struct @@ -597,10 +597,15 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler // bool supports_wg32 = ui.supports_wave32 && ui.target_meshlet_workgroup_size == 32; + bool local_invocation_indexed = + device.get_device_features().mesh_shader_properties.prefersLocalInvocationPrimitiveOutput || + device.get_device_features().mesh_shader_properties.prefersLocalInvocationVertexOutput; + if (ui.use_preculling) { cmd->set_program("", mesh_path, "assets://shaders/meshlet_debug.mesh.frag", { { "MESHLET_SIZE", int(ui.target_meshlet_workgroup_size) }, + { "MESHLET_LOCAL_INVOCATION_INDEXED", int(local_invocation_indexed) }, { "MESHLET_VERTEX_ID", int(ui.use_vertex_id) } }); } else @@ -612,6 +617,7 @@ struct MeshletViewerApplication : Granite::Application, Granite::EventHandler // { "MESHLET_RENDER_PHASE", render_phase }, { "MESHLET_PRIMITIVE_CULL_WG32", int(supports_wg32) }, { "MESHLET_VERTEX_ID", int(ui.use_vertex_id) }, + { "MESHLET_LOCAL_INVOCATION_INDEXED", int(local_invocation_indexed) }, { "MESHLET_PRIMITIVE_CULL_WAVE32", int(ui.supports_wave32) } }); cmd->set_storage_buffer(0, 6, *aabb_visibility_buffer);