From 808b51c1cf719d10e797ab70a769eae19460c2f5 Mon Sep 17 00:00:00 2001 From: Benjamin Doherty Date: Mon, 22 Apr 2024 16:06:39 -0600 Subject: [PATCH 01/31] matedit: fix use-after-free --- tools/matedit/src/ExternalCompile.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/matedit/src/ExternalCompile.cpp b/tools/matedit/src/ExternalCompile.cpp index ee0374bc2d8..11d1583d1b1 100644 --- a/tools/matedit/src/ExternalCompile.cpp +++ b/tools/matedit/src/ExternalCompile.cpp @@ -160,8 +160,10 @@ static bool invokeScript(const std::vector& userArgs, backend::Shad // Temporary input and output files argv.push_back(const_cast(inputPath.c_str())); argv.push_back(const_cast(outputPath.c_str())); - argv.push_back(const_cast(toString(stage).c_str())); - argv.push_back(const_cast(toString(model).c_str())); + auto stageString = toString(stage); + argv.push_back(const_cast(stageString.c_str())); + auto modelString = toString(model); + argv.push_back(const_cast(modelString.c_str())); // Optional user-supplied arguments for (int i = 1; i < userArgs.size(); i++) { From 14a99cbc63e2dacd1d1abb6163827697f53ad48a Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Wed, 17 Apr 2024 16:31:27 -0700 Subject: [PATCH 02/31] add the target refresh interval to the backend beginFrame --- .../include/backend/platforms/OpenGLPlatform.h | 17 +++++++++++++++++ .../include/private/backend/DriverAPI.inc | 1 + filament/backend/src/metal/MetalDriver.mm | 3 ++- filament/backend/src/noop/NoopDriver.cpp | 3 ++- filament/backend/src/opengl/OpenGLDriver.cpp | 3 +++ filament/backend/src/opengl/OpenGLPlatform.cpp | 7 +++++++ filament/backend/src/vulkan/VulkanDriver.cpp | 3 ++- filament/backend/test/test_Blit.cpp | 16 ++++++++-------- filament/backend/test/test_BufferUpdates.cpp | 2 +- filament/backend/test/test_FeedbackLoops.cpp | 2 +- filament/backend/test/test_LoadImage.cpp | 6 +++--- filament/backend/test/test_MRT.cpp | 2 +- filament/backend/test/test_MipLevels.cpp | 2 +- .../test/test_MissingRequiredAttributes.cpp | 2 +- filament/backend/test/test_ReadPixels.cpp | 4 ++-- .../backend/test/test_RenderExternalImage.cpp | 4 ++-- filament/backend/test/test_Scissor.cpp | 4 ++-- filament/backend/test/test_StencilBuffer.cpp | 4 ++-- filament/src/details/Renderer.cpp | 10 ++++++++-- 19 files changed, 66 insertions(+), 29 deletions(-) diff --git a/filament/backend/include/backend/platforms/OpenGLPlatform.h b/filament/backend/include/backend/platforms/OpenGLPlatform.h index dec6f47ba74..0645759e8d7 100644 --- a/filament/backend/include/backend/platforms/OpenGLPlatform.h +++ b/filament/backend/include/backend/platforms/OpenGLPlatform.h @@ -140,6 +140,23 @@ class OpenGLPlatform : public Platform { */ virtual uint32_t getDefaultFramebufferObject() noexcept; + /** + * Called by the backend when a frame starts. + * @param steady_clock_ns vsync time point on the monotonic clock + * @param refreshIntervalNs refresh interval in nanosecond + * @param frameId a frame id + */ + virtual void beginFrame( + int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, + uint32_t frameId) noexcept; + + /** + * Called by the backend when a frame ends. + * @param frameId the frame id used in beginFrame + */ + virtual void endFrame( + uint32_t frameId) noexcept; /** * Type of contexts available diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index 680a6bb0136..844841862b3 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -133,6 +133,7 @@ DECL_DRIVER_API_0(tick) DECL_DRIVER_API_N(beginFrame, int64_t, monotonic_clock_ns, + int64_t, refreshIntervalNs, uint32_t, frameId) DECL_DRIVER_API_N(setFrameScheduledCallback, diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index f99a6b63a66..d5e8dbf981e 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -212,7 +212,8 @@ executeTickOps(); } -void MetalDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) { +void MetalDriver::beginFrame(int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, uint32_t frameId) { #if defined(FILAMENT_METAL_PROFILING) os_signpost_interval_begin(mContext->log, mContext->signpostId, "Frame encoding", "%{public}d", frameId); #endif diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp index 7a150a3e74a..3eb88afe577 100644 --- a/filament/backend/src/noop/NoopDriver.cpp +++ b/filament/backend/src/noop/NoopDriver.cpp @@ -49,7 +49,8 @@ void NoopDriver::terminate() { void NoopDriver::tick(int) { } -void NoopDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) { +void NoopDriver::beginFrame(int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, uint32_t frameId) { } void NoopDriver::setFrameScheduledCallback(Handle sch, diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index 82723bcb3e0..cd0c24c123a 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -3406,10 +3406,12 @@ void OpenGLDriver::tick(int) { void OpenGLDriver::beginFrame( UTILS_UNUSED int64_t monotonic_clock_ns, + UTILS_UNUSED int64_t refreshIntervalNs, UTILS_UNUSED uint32_t frameId) { DEBUG_MARKER() auto& gl = mContext; insertEventMarker("beginFrame"); + mPlatform.beginFrame(monotonic_clock_ns, refreshIntervalNs, frameId); if (UTILS_UNLIKELY(!mTexturesWithStreamsAttached.empty())) { OpenGLPlatform& platform = mPlatform; for (GLTexture const* t : mTexturesWithStreamsAttached) { @@ -3457,6 +3459,7 @@ void OpenGLDriver::endFrame(UTILS_UNUSED uint32_t frameId) { #endif //SYSTRACE_NAME("glFinish"); //glFinish(); + mPlatform.endFrame(frameId); insertEventMarker("endFrame"); } diff --git a/filament/backend/src/opengl/OpenGLPlatform.cpp b/filament/backend/src/opengl/OpenGLPlatform.cpp index 17359e85f7b..33bbb9ad0a6 100644 --- a/filament/backend/src/opengl/OpenGLPlatform.cpp +++ b/filament/backend/src/opengl/OpenGLPlatform.cpp @@ -55,6 +55,13 @@ uint32_t OpenGLPlatform::getDefaultFramebufferObject() noexcept { return 0; } +void OpenGLPlatform::beginFrame(int64_t monotonic_clock_ns, int64_t refreshIntervalNs, + uint32_t frameId) noexcept { +} + +void OpenGLPlatform::endFrame(uint32_t frameId) noexcept { +} + OpenGLPlatform::ContextType OpenGLPlatform::getCurrentContextType() const noexcept { return ContextType::UNPROTECTED; } diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index e1cd3d7cf74..2b2284e092e 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -391,7 +391,8 @@ void VulkanDriver::collectGarbage() { FVK_SYSTRACE_END(); } -void VulkanDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) { +void VulkanDriver::beginFrame(int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, uint32_t frameId) { // Do nothing. } diff --git a/filament/backend/test/test_Blit.cpp b/filament/backend/test/test_Blit.cpp index 04809884453..61c8b89e5e3 100644 --- a/filament/backend/test/test_Blit.cpp +++ b/filament/backend/test/test_Blit.cpp @@ -238,13 +238,13 @@ TEST_F(BackendTest, ColorMagnify) { {0, 0, kSrcTexWidth >> srcLevel, kSrcTexHeight >> srcLevel}, SamplerMagFilter::LINEAR); // Push through an empty frame to allow the texture to upload and the blit to execute. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.commit(swapChain); api.endFrame(0); // Grab a screenshot. ScreenshotParams params { kDstTexWidth, kDstTexHeight, "ColorMagnify.png" }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); dumpScreenshot(api, dstRenderTargets[0], ¶ms); api.commit(swapChain); api.endFrame(0); @@ -402,7 +402,7 @@ TEST_F(BackendTest, ColorResolve) { }); // FIXME: on Metal this triangle is not drawn. Can't understand why. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(srcRenderTarget, params); api.bindUniformBuffer(0, ubuffer); api.draw(state, triangle.getRenderPrimitive(), 0, 3, 1); @@ -484,13 +484,13 @@ TEST_F(BackendTest, Blit2DTextureArray) { {0, 0, kSrcTexWidth >> srcLevel, kSrcTexHeight >> srcLevel}, SamplerMagFilter::LINEAR); // Push through an empty frame to allow the texture to upload and the blit to execute. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.commit(swapChain); api.endFrame(0); // Grab a screenshot. ScreenshotParams params { kDstTexWidth, kDstTexHeight, "Blit2DTextureArray.png" }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); dumpScreenshot(api, dstRenderTarget, ¶ms); api.commit(swapChain); api.endFrame(0); @@ -574,13 +574,13 @@ TEST_F(BackendTest, BlitRegion) { SamplerMagFilter::LINEAR); // Push through an empty frame to allow the texture to upload and the blit to execute. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.commit(swapChain); api.endFrame(0); // Grab a screenshot. ScreenshotParams params { kDstTexWidth, kDstTexHeight, "BlitRegion.png" }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); dumpScreenshot(api, dstRenderTarget, ¶ms); api.commit(swapChain); api.endFrame(0); @@ -655,7 +655,7 @@ TEST_F(BackendTest, BlitRegionToSwapChain) { .height = kDstTexHeight - 10, }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.blitDEPRECATED(TargetBufferFlags::COLOR0, dstRenderTarget, dstRect, srcRenderTargets[srcLevel], diff --git a/filament/backend/test/test_BufferUpdates.cpp b/filament/backend/test/test_BufferUpdates.cpp index 3207fdae9d9..bde771448a1 100644 --- a/filament/backend/test/test_BufferUpdates.cpp +++ b/filament/backend/test/test_BufferUpdates.cpp @@ -143,7 +143,7 @@ TEST_F(BackendTest, VertexBufferUpdate) { } getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Draw 10 triangles, updating the vertex buffer / index buffer each time. size_t triangleIndex = 0; diff --git a/filament/backend/test/test_FeedbackLoops.cpp b/filament/backend/test/test_FeedbackLoops.cpp index 04f72578013..232127ede57 100644 --- a/filament/backend/test/test_FeedbackLoops.cpp +++ b/filament/backend/test/test_FeedbackLoops.cpp @@ -200,7 +200,7 @@ TEST_F(BackendTest, FeedbackLoops) { auto ubuffer = api.createBufferObject(sizeof(MaterialParams), BufferObjectBinding::UNIFORM, BufferUsage::STATIC); api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.bindSamplers(0, sgroup); api.bindUniformBuffer(0, ubuffer); diff --git a/filament/backend/test/test_LoadImage.cpp b/filament/backend/test/test_LoadImage.cpp index 8e1d8f3b562..2af57eb7196 100644 --- a/filament/backend/test/test_LoadImage.cpp +++ b/filament/backend/test/test_LoadImage.cpp @@ -414,7 +414,7 @@ TEST_F(BackendTest, UpdateImageSRGB) { api.update3DImage(texture, 0, 0, 0, 0, 512, 512, 1, std::move(descriptor)); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // Update samplers. SamplerGroup samplers(1); @@ -486,7 +486,7 @@ TEST_F(BackendTest, UpdateImageMipLevel) { PixelBufferDescriptor descriptor = checkerboardPixelBuffer(pixelFormat, pixelType, 512); api.update3DImage(texture, /* level*/ 1, 0, 0, 0, 512, 512, 1, std::move(descriptor)); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // Update samplers. SamplerGroup samplers(1); @@ -570,7 +570,7 @@ TEST_F(BackendTest, UpdateImage3D) { api.update3DImage(texture, 0, 0, 0, 0, 512, 512, 4, std::move(descriptor)); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // Update samplers. SamplerGroup samplers(1); diff --git a/filament/backend/test/test_MRT.cpp b/filament/backend/test/test_MRT.cpp index bd29d27e733..21a30f7104a 100644 --- a/filament/backend/test/test_MRT.cpp +++ b/filament/backend/test/test_MRT.cpp @@ -127,7 +127,7 @@ TEST_F(BackendTest, MRT) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Draw a triangle. getDriverApi().beginRenderPass(renderTarget, params); diff --git a/filament/backend/test/test_MipLevels.cpp b/filament/backend/test/test_MipLevels.cpp index 76497e1b2a4..3c54839dad1 100644 --- a/filament/backend/test/test_MipLevels.cpp +++ b/filament/backend/test/test_MipLevels.cpp @@ -143,7 +143,7 @@ TEST_F(BackendTest, SetMinMaxLevel) { TrianglePrimitive triangle(api); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // We set the base mip to 1, and the max mip to 3 // Level 0: 128x128 (red) diff --git a/filament/backend/test/test_MissingRequiredAttributes.cpp b/filament/backend/test/test_MissingRequiredAttributes.cpp index c56cece5993..453b1bbcb4b 100644 --- a/filament/backend/test/test_MissingRequiredAttributes.cpp +++ b/filament/backend/test/test_MissingRequiredAttributes.cpp @@ -100,7 +100,7 @@ TEST_F(BackendTest, MissingRequiredAttributes) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Render a triangle. getDriverApi().beginRenderPass(defaultRenderTarget, params); diff --git a/filament/backend/test/test_ReadPixels.cpp b/filament/backend/test/test_ReadPixels.cpp index 98d8c41705d..ee1a37cd9c2 100644 --- a/filament/backend/test/test_ReadPixels.cpp +++ b/filament/backend/test/test_ReadPixels.cpp @@ -295,7 +295,7 @@ TEST_F(ReadPixelsTest, ReadPixels) { params.viewport.width = t.getRenderTargetSize(); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Render a white triangle over blue. getDriverApi().beginRenderPass(renderTarget, params); @@ -434,7 +434,7 @@ TEST_F(ReadPixelsTest, ReadPixelsPerformance) { } getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Render some content, just so we don't read back uninitialized data. getDriverApi().beginRenderPass(renderTarget, params); diff --git a/filament/backend/test/test_RenderExternalImage.cpp b/filament/backend/test/test_RenderExternalImage.cpp index 9e8d1d5fa70..b8261434b23 100644 --- a/filament/backend/test/test_RenderExternalImage.cpp +++ b/filament/backend/test/test_RenderExternalImage.cpp @@ -109,7 +109,7 @@ TEST_F(BackendTest, RenderExternalImageWithoutSet) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); SamplerGroup samplers(1); samplers.setSampler(0, { texture, {} }); @@ -231,7 +231,7 @@ TEST_F(BackendTest, RenderExternalImage) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); SamplerGroup samplers(1); samplers.setSampler(0, { texture, {} }); diff --git a/filament/backend/test/test_Scissor.cpp b/filament/backend/test/test_Scissor.cpp index 985ef9414b8..42f2fe750b0 100644 --- a/filament/backend/test/test_Scissor.cpp +++ b/filament/backend/test/test_Scissor.cpp @@ -136,7 +136,7 @@ TEST_F(BackendTest, ScissorViewportRegion) { ps.rasterState.depthWrite = false; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(srcRenderTarget, params); api.scissor(scissor); @@ -227,7 +227,7 @@ TEST_F(BackendTest, ScissorViewportEdgeCases) { ps.rasterState.depthWrite = false; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(renderTarget, params); api.scissor(scissor); diff --git a/filament/backend/test/test_StencilBuffer.cpp b/filament/backend/test/test_StencilBuffer.cpp index aa02830b547..4e9d2183294 100644 --- a/filament/backend/test/test_StencilBuffer.cpp +++ b/filament/backend/test/test_StencilBuffer.cpp @@ -110,7 +110,7 @@ class BasicStencilBufferTest : public BackendTest { ps.stencilState.front.stencilOpDepthStencilPass = StencilOperation::INCR; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(renderTarget, params); api.draw(ps, smallTriangle.getRenderPrimitive(), 0, 3, 1); @@ -237,7 +237,7 @@ TEST_F(BasicStencilBufferTest, StencilBufferMSAA) { ps.stencilState.front.stencilOpDepthStencilPass = StencilOperation::INCR; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(renderTarget0, params); api.draw(ps, smallTriangle.getRenderPrimitive(), 0, 3, 1); diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp index d9a20d79379..04af08799f5 100644 --- a/filament/src/details/Renderer.cpp +++ b/filament/src/details/Renderer.cpp @@ -298,7 +298,10 @@ bool FRenderer::beginFrame(FSwapChain* swapChain, uint64_t vsyncSteadyClockTimeN FEngine& engine = mEngine; FEngine::DriverApi& driver = engine.getDriverApi(); - driver.beginFrame(appVsync.time_since_epoch().count(), mFrameId); + driver.beginFrame( + appVsync.time_since_epoch().count(), + int64_t(1'000'000'000.0 / mDisplayInfo.refreshRate), + mFrameId); // This need to occur after the backend beginFrame() because some backends need to start // a command buffer before creating a fence. @@ -462,7 +465,10 @@ void FRenderer::renderStandaloneView(FView const* view) { engine.prepare(); FEngine::DriverApi& driver = engine.getDriverApi(); - driver.beginFrame(steady_clock::now().time_since_epoch().count(), mFrameId); + driver.beginFrame( + steady_clock::now().time_since_epoch().count(), + int64_t(1'000'000'000.0 / mDisplayInfo.refreshRate), + mFrameId); renderInternal(view); From d0586743f69142fcfe01a1688c262c9b01172161 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Mon, 15 Apr 2024 22:53:58 -0700 Subject: [PATCH 03/31] add ADPF support for backend thread --- .../backend/platforms/OpenGLPlatform.h | 6 + .../backend/platforms/PlatformEGLAndroid.h | 16 +++ .../backend/src/opengl/OpenGLPlatform.cpp | 3 + .../opengl/platforms/PlatformEGLAndroid.cpp | 35 ++++++ filament/src/details/Renderer.cpp | 6 +- libs/utils/CMakeLists.txt | 1 + .../utils/android/PerformanceHintManager.h | 68 +++++++++++ .../src/android/PerformanceHintManager.cpp | 114 ++++++++++++++++++ 8 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 libs/utils/include/utils/android/PerformanceHintManager.h create mode 100644 libs/utils/src/android/PerformanceHintManager.cpp diff --git a/filament/backend/include/backend/platforms/OpenGLPlatform.h b/filament/backend/include/backend/platforms/OpenGLPlatform.h index 0645759e8d7..e00930c98ca 100644 --- a/filament/backend/include/backend/platforms/OpenGLPlatform.h +++ b/filament/backend/include/backend/platforms/OpenGLPlatform.h @@ -208,6 +208,12 @@ class OpenGLPlatform : public Platform { utils::Invocable preContextChange, utils::Invocable postContextChange) noexcept; + /** + * Called by the backend just before calling commit() + * @see commit() + */ + virtual void preCommit() noexcept; + /** * Called by the driver once the current frame finishes drawing. Typically, this should present * the drawSwapChain. This is for example where `eglMakeCurrent()` would be called. diff --git a/filament/backend/include/backend/platforms/PlatformEGLAndroid.h b/filament/backend/include/backend/platforms/PlatformEGLAndroid.h index 32f830384d3..d0caeb8c89b 100644 --- a/filament/backend/include/backend/platforms/PlatformEGLAndroid.h +++ b/filament/backend/include/backend/platforms/PlatformEGLAndroid.h @@ -22,6 +22,10 @@ #include #include +#include + +#include + #include #include @@ -58,6 +62,13 @@ class PlatformEGLAndroid : public PlatformEGL { void terminate() noexcept override; + void beginFrame( + int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, + uint32_t frameId) noexcept override; + + void preCommit() noexcept override; + /** * Set the presentation time using `eglPresentationTimeANDROID` * @param presentationTimeInNanosecond @@ -81,6 +92,11 @@ class PlatformEGLAndroid : public PlatformEGL { private: int mOSVersion; ExternalStreamManagerAndroid& mExternalStreamManager; + utils::PerformanceHintManager mPerformanceHintManager; + utils::PerformanceHintManager::Session mPerformanceHintSession; + + using clock = std::chrono::high_resolution_clock; + clock::time_point mStartTimeOfActualWork; }; } // namespace filament::backend diff --git a/filament/backend/src/opengl/OpenGLPlatform.cpp b/filament/backend/src/opengl/OpenGLPlatform.cpp index 33bbb9ad0a6..94c3b991126 100644 --- a/filament/backend/src/opengl/OpenGLPlatform.cpp +++ b/filament/backend/src/opengl/OpenGLPlatform.cpp @@ -62,6 +62,9 @@ void OpenGLPlatform::beginFrame(int64_t monotonic_clock_ns, int64_t refreshInter void OpenGLPlatform::endFrame(uint32_t frameId) noexcept { } +void OpenGLPlatform::preCommit() noexcept { +} + OpenGLPlatform::ContextType OpenGLPlatform::getCurrentContextType() const noexcept { return ContextType::UNPROTECTED; } diff --git a/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp b/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp index addc5e02719..94d602a0ba8 100644 --- a/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp +++ b/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp @@ -25,6 +25,8 @@ #include #include +#include + #include #include #include @@ -36,8 +38,11 @@ #include +#include #include +#include + #include #include #include @@ -112,8 +117,38 @@ void PlatformEGLAndroid::terminate() noexcept { PlatformEGL::terminate(); } +void PlatformEGLAndroid::beginFrame( + int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, + uint32_t frameId) noexcept { + if (mPerformanceHintSession.isValid()) { + if (refreshIntervalNs <= 0) { + // we're not provided with a target time, assume 16.67ms + refreshIntervalNs = 16'666'667; + } + mStartTimeOfActualWork = clock::time_point(std::chrono::nanoseconds(monotonic_clock_ns)); + mPerformanceHintSession.updateTargetWorkDuration(refreshIntervalNs); + } + PlatformEGL::beginFrame(monotonic_clock_ns, refreshIntervalNs, frameId); +} + +void backend::PlatformEGLAndroid::preCommit() noexcept { + if (mPerformanceHintSession.isValid()) { + auto const actualWorkDuration = std::chrono::duration_cast( + clock::now() - mStartTimeOfActualWork); + mPerformanceHintSession.reportActualWorkDuration(actualWorkDuration.count()); + } + PlatformEGL::preCommit(); +} + Driver* PlatformEGLAndroid::createDriver(void* sharedContext, const Platform::DriverConfig& driverConfig) noexcept { + + // the refresh rate default value doesn't matter, we change it later + int32_t const tid = gettid(); + mPerformanceHintSession = PerformanceHintManager::Session{ + mPerformanceHintManager, &tid, 1, 16'666'667 }; + Driver* driver = PlatformEGL::createDriver(sharedContext, driverConfig); auto extensions = GLUtils::split(eglQueryString(mEGLDisplay, EGL_EXTENSIONS)); diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp index 04af08799f5..cc4994e6190 100644 --- a/filament/src/details/Renderer.cpp +++ b/filament/src/details/Renderer.cpp @@ -300,7 +300,8 @@ bool FRenderer::beginFrame(FSwapChain* swapChain, uint64_t vsyncSteadyClockTimeN driver.beginFrame( appVsync.time_since_epoch().count(), - int64_t(1'000'000'000.0 / mDisplayInfo.refreshRate), + mDisplayInfo.refreshRate == 0.0 ? 0 : int64_t( + 1'000'000'000.0 / mDisplayInfo.refreshRate), mFrameId); // This need to occur after the backend beginFrame() because some backends need to start @@ -467,7 +468,8 @@ void FRenderer::renderStandaloneView(FView const* view) { FEngine::DriverApi& driver = engine.getDriverApi(); driver.beginFrame( steady_clock::now().time_since_epoch().count(), - int64_t(1'000'000'000.0 / mDisplayInfo.refreshRate), + mDisplayInfo.refreshRate == 0.0 ? 0 : int64_t( + 1'000'000'000.0 / mDisplayInfo.refreshRate), mFrameId); renderInternal(view); diff --git a/libs/utils/CMakeLists.txt b/libs/utils/CMakeLists.txt index e19a943c185..928e2f37755 100644 --- a/libs/utils/CMakeLists.txt +++ b/libs/utils/CMakeLists.txt @@ -80,6 +80,7 @@ if (WIN32) endif() if (ANDROID) list(APPEND SRCS src/android/ThermalManager.cpp) + list(APPEND SRCS src/android/PerformanceHintManager.cpp) list(APPEND SRCS src/android/Systrace.cpp) endif() if (LINUX OR ANDROID) diff --git a/libs/utils/include/utils/android/PerformanceHintManager.h b/libs/utils/include/utils/android/PerformanceHintManager.h new file mode 100644 index 00000000000..9808186475c --- /dev/null +++ b/libs/utils/include/utils/android/PerformanceHintManager.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_UTILS_ANDROID_PERFORMANCEHINTMANAGER_H +#define TNT_UTILS_ANDROID_PERFORMANCEHINTMANAGER_H + +#include +#include + +#include +#include + +namespace utils { + +namespace details { +struct PerformanceHintManager; +} // namespace details + +class UTILS_PUBLIC PerformanceHintManager : + private PrivateImplementation { + friend struct details::PerformanceHintManager; + struct SessionDetails; + +public: + class UTILS_PUBLIC Session : PrivateImplementation { + friend class PerformanceHintManager; + friend struct PerformanceHintManager::SessionDetails; + public: + Session() noexcept; + Session(PerformanceHintManager& manager, + int32_t const* threadIds, size_t size, + int64_t initialTargetWorkDurationNanos) noexcept; + ~Session() noexcept; + + Session(Session&& rhs) noexcept; + Session& operator=(Session&& rhs) noexcept; + Session(Session const& rhs) = delete; + Session& operator=(Session const& rhs) = delete; + + bool isValid() const; + int updateTargetWorkDuration(int64_t targetDurationNanos) noexcept; + int reportActualWorkDuration(int64_t actualDurationNanos) noexcept; + }; + + PerformanceHintManager() noexcept; + ~PerformanceHintManager() noexcept; + + bool isValid() const; + + int64_t getPreferredUpdateRateNanos() const noexcept; +}; + +} // namespace utils + +#endif //TNT_UTILS_ANDROID_PERFORMANCEHINTMANAGER_H diff --git a/libs/utils/src/android/PerformanceHintManager.cpp b/libs/utils/src/android/PerformanceHintManager.cpp new file mode 100644 index 00000000000..c2c8f8b8f15 --- /dev/null +++ b/libs/utils/src/android/PerformanceHintManager.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include + +#define UTILS_PRIVATE_IMPLEMENTATION_NON_COPYABLE +#include + +namespace utils { + +namespace details { +struct PerformanceHintManager { + APerformanceHintManager* mManager = nullptr; +}; +} // namespace details + + +struct PerformanceHintManager::SessionDetails { + APerformanceHintSession* mSession = nullptr; +}; + +PerformanceHintManager::PerformanceHintManager() noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + mImpl->mManager = APerformanceHint_getManager(); + } +} + +PerformanceHintManager::~PerformanceHintManager() noexcept = default; + +bool PerformanceHintManager::isValid() const { + return mImpl->mManager != nullptr; +} + +int64_t PerformanceHintManager::getPreferredUpdateRateNanos() const noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mManager)) { + return APerformanceHint_getPreferredUpdateRateNanos(mImpl->mManager); + } + } + return -1; +} + +// ------------------------------------------------------------------------------------------------ + +PerformanceHintManager::Session::Session() noexcept = default; + +PerformanceHintManager::Session::Session(PerformanceHintManager& manager, int32_t const* threadIds, + size_t size, int64_t initialTargetWorkDurationNanos) noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(manager.isValid())) { + mImpl->mSession = APerformanceHint_createSession( + manager->mManager, threadIds, size, initialTargetWorkDurationNanos); + } + } +} + +PerformanceHintManager::Session::Session(Session&& rhs) noexcept = default; + +PerformanceHintManager::Session& PerformanceHintManager::Session::operator=(Session&& rhs) noexcept = default; + +PerformanceHintManager::Session::~Session() noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mSession)) { + APerformanceHint_closeSession(mImpl->mSession); + } + } +} + +bool PerformanceHintManager::Session::isValid() const { + return mImpl->mSession != nullptr; +} + +int PerformanceHintManager::Session::updateTargetWorkDuration( + int64_t targetDurationNanos) noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mSession)) { + return APerformanceHint_updateTargetWorkDuration(mImpl->mSession, targetDurationNanos); + } + } + return -1; +} + +int PerformanceHintManager::Session::reportActualWorkDuration( + int64_t actualDurationNanos) noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mSession)) { + return APerformanceHint_updateTargetWorkDuration(mImpl->mSession, actualDurationNanos); + } + } + return -1; +} + +} // namespace utils + From 0d25f7942109a28f8065acfed5c63ecdd2cbe107 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Mon, 22 Apr 2024 10:03:13 -0700 Subject: [PATCH 04/31] gl: simplify how we track bound textures In opengl it's possible to bind several textures to the same texture unit as long as they're a different target. Until now we were tracking that state. In practice it's not very useful to bind several textures to the same unit (it is a little bit when updating texture data, but not when rendering). With the coming change to a descriptor set API, it is better to have a 1-to-1 mapping between bound textures and texture units. So with this change, only a single texture can be bound to a texture unit. If another texture in bound to the same unit with a different target, we first unbind the texture from the current target. There is less state to track, and it allows us to "unbind a texture unit" (whereas before we'd have to iterate through all the possible targets for that unit and unbind all of them). --- filament/backend/src/opengl/GLTexture.h | 6 +-- filament/backend/src/opengl/OpenGLContext.cpp | 17 ++++-- filament/backend/src/opengl/OpenGLContext.h | 53 ++++++------------- filament/backend/src/opengl/OpenGLDriver.cpp | 25 +-------- 4 files changed, 35 insertions(+), 66 deletions(-) diff --git a/filament/backend/src/opengl/GLTexture.h b/filament/backend/src/opengl/GLTexture.h index 91aadfc36af..5e9460e17a5 100644 --- a/filament/backend/src/opengl/GLTexture.h +++ b/filament/backend/src/opengl/GLTexture.h @@ -30,7 +30,7 @@ namespace filament::backend { struct GLTexture : public HwTexture { using HwTexture::HwTexture; struct GL { - GL() noexcept : imported(false), sidecarSamples(1), reserved(0) {} + GL() noexcept : imported(false), sidecarSamples(1), reserved1(0) {} GLuint id = 0; // texture or renderbuffer id GLenum target = 0; GLenum internalFormat = 0; @@ -40,10 +40,10 @@ struct GLTexture : public HwTexture { GLfloat anisotropy = 1.0; int8_t baseLevel = 127; int8_t maxLevel = -1; - uint8_t targetIndex = 0; // optimization: index corresponding to target + uint8_t reserved0 = 0; bool imported : 1; uint8_t sidecarSamples : 4; - uint8_t reserved : 3; + uint8_t reserved1 : 3; } gl; OpenGLPlatform::ExternalTexture* externalTexture = nullptr; diff --git a/filament/backend/src/opengl/OpenGLContext.cpp b/filament/backend/src/opengl/OpenGLContext.cpp index 5355e6b1b95..f87c5004061 100644 --- a/filament/backend/src/opengl/OpenGLContext.cpp +++ b/filament/backend/src/opengl/OpenGLContext.cpp @@ -881,19 +881,28 @@ void OpenGLContext::pixelStore(GLenum pname, GLint param) noexcept { } } -void OpenGLContext::unbindTexture(GLenum target, GLuint texture_id) noexcept { +void OpenGLContext::unbindTexture( + UTILS_UNUSED_IN_RELEASE GLenum target, GLuint texture_id) noexcept { // unbind this texture from all the units it might be bound to // no need unbind the texture from FBOs because we're not tracking that state (and there is // no need to). - const size_t index = getIndexForTextureTarget(target); UTILS_NOUNROLL for (GLuint unit = 0; unit < MAX_TEXTURE_UNIT_COUNT; unit++) { - if (state.textures.units[unit].targets[index].texture_id == texture_id) { - bindTexture(unit, target, (GLuint)0, index); + if (state.textures.units[unit].id == texture_id) { + // if this texture is bound, it should be at the same target + assert_invariant(state.textures.units[unit].target == target); + unbindTextureUnit(unit); } } } +void OpenGLContext::unbindTextureUnit(GLuint unit) noexcept { + update_state(state.textures.units[unit].id, 0u, [&]() { + activeTexture(unit); + glBindTexture(state.textures.units[unit].target, 0u); + }); +} + void OpenGLContext::unbindSampler(GLuint sampler) noexcept { // unbind this sampler from all the units it might be bound to UTILS_NOUNROLL // clang generates >800B of code!!! diff --git a/filament/backend/src/opengl/OpenGLContext.h b/filament/backend/src/opengl/OpenGLContext.h index e6edc0c2ae9..ff01b76d648 100644 --- a/filament/backend/src/opengl/OpenGLContext.h +++ b/filament/backend/src/opengl/OpenGLContext.h @@ -137,7 +137,6 @@ class OpenGLContext final : public TimerQueryFactoryInterface { #endif } - constexpr static inline size_t getIndexForTextureTarget(GLuint target) noexcept; constexpr inline size_t getIndexForCap(GLenum cap) noexcept; constexpr static inline size_t getIndexForBufferTarget(GLenum target) noexcept; @@ -149,10 +148,10 @@ class OpenGLContext final : public TimerQueryFactoryInterface { void pixelStore(GLenum, GLint) noexcept; inline void activeTexture(GLuint unit) noexcept; - inline void bindTexture(GLuint unit, GLuint target, GLuint texId, size_t targetIndex) noexcept; inline void bindTexture(GLuint unit, GLuint target, GLuint texId) noexcept; void unbindTexture(GLenum target, GLuint id) noexcept; + void unbindTextureUnit(GLuint unit) noexcept; inline void bindVertexArray(RenderPrimitive const* p) noexcept; inline void bindSampler(GLuint unit, GLuint sampler) noexcept; void unbindSampler(GLuint sampler) noexcept; @@ -322,8 +321,14 @@ class OpenGLContext final : public TimerQueryFactoryInterface { // function to handle state changes we don't control void updateTexImage(GLenum target, GLuint id) noexcept { - const size_t index = getIndexForTextureTarget(target); - state.textures.units[state.textures.active].targets[index].texture_id = id; + assert_invariant(target == GL_TEXTURE_EXTERNAL_OES); + // if another target is bound to this texture unit, unbind that texture + if (UTILS_UNLIKELY(state.textures.units[state.textures.active].target != target)) { + glBindTexture(state.textures.units[state.textures.active].target, 0); + state.textures.units[state.textures.active].target = GL_TEXTURE_EXTERNAL_OES; + } + // the texture is already bound to `target`, we just update our internal state + state.textures.units[state.textures.active].id = id; } void resetProgram() noexcept { state.program.use = 0; } @@ -426,9 +431,8 @@ class OpenGLContext final : public TimerQueryFactoryInterface { GLuint active = 0; // zero-based struct { GLuint sampler = 0; - struct { - GLuint texture_id = 0; - } targets[7]; // this must match getIndexForTextureTarget() + GLuint target = 0; + GLuint id = 0; } units[MAX_TEXTURE_UNIT_COUNT]; } textures; @@ -598,31 +602,10 @@ class OpenGLContext final : public TimerQueryFactoryInterface { } void setDefaultState() noexcept; - - static constexpr const size_t TEXTURE_TARGET_COUNT = - sizeof(state.textures.units[0].targets) / sizeof(state.textures.units[0].targets[0]); - }; // ------------------------------------------------------------------------------------------------ -constexpr size_t OpenGLContext::getIndexForTextureTarget(GLuint target) noexcept { - // this must match state.textures[].targets[] - switch (target) { - case GL_TEXTURE_2D: return 0; - case GL_TEXTURE_2D_ARRAY: return 1; - case GL_TEXTURE_CUBE_MAP: return 2; -#if defined(BACKEND_OPENGL_LEVEL_GLES31) - case GL_TEXTURE_2D_MULTISAMPLE: return 3; -#endif - case GL_TEXTURE_EXTERNAL_OES: return 4; - case GL_TEXTURE_3D: return 5; - case GL_TEXTURE_CUBE_MAP_ARRAY: return 6; - default: - return 0; - } -} - constexpr size_t OpenGLContext::getIndexForCap(GLenum cap) noexcept { //NOLINT size_t index = 0; switch (cap) { @@ -770,19 +753,17 @@ void OpenGLContext::bindBufferRange(GLenum target, GLuint index, GLuint buffer, #endif } -void OpenGLContext::bindTexture(GLuint unit, GLuint target, GLuint texId, size_t targetIndex) noexcept { - assert_invariant(targetIndex == getIndexForTextureTarget(target)); - assert_invariant(targetIndex < TEXTURE_TARGET_COUNT); - update_state(state.textures.units[unit].targets[targetIndex].texture_id, texId, [&]() { +void OpenGLContext::bindTexture(GLuint unit, GLuint target, GLuint texId) noexcept { + update_state(state.textures.units[unit].target, target, [&]() { + activeTexture(unit); + glBindTexture(state.textures.units[unit].target, 0); + }); + update_state(state.textures.units[unit].id, texId, [&]() { activeTexture(unit); glBindTexture(target, texId); }, target == GL_TEXTURE_EXTERNAL_OES); } -void OpenGLContext::bindTexture(GLuint unit, GLuint target, GLuint texId) noexcept { - bindTexture(unit, target, texId, getIndexForTextureTarget(target)); -} - void OpenGLContext::useProgram(GLuint program) noexcept { update_state(state.program.use, program, [&]() { glUseProgram(program); diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index cd0c24c123a..15f7df08951 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -287,7 +287,7 @@ void OpenGLDriver::bindSampler(GLuint unit, GLuint sampler) noexcept { void OpenGLDriver::bindTexture(GLuint unit, GLTexture const* t) noexcept { assert_invariant(t != nullptr); - mContext.bindTexture(unit, t->gl.target, t->gl.id, t->gl.targetIndex); + mContext.bindTexture(unit, t->gl.target, t->gl.id); } bool OpenGLDriver::useProgram(OpenGLProgram* p) noexcept { @@ -749,7 +749,6 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint if (t->externalTexture) { t->gl.target = t->externalTexture->target; t->gl.id = t->externalTexture->id; - t->gl.targetIndex = (uint8_t)OpenGLContext::getIndexForTextureTarget(t->gl.target); // internalFormat actually depends on the external image, but it doesn't matter // because it's not used anywhere for anything important. t->gl.internalFormat = internalFormat; @@ -761,30 +760,23 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint t->gl.internalFormat = internalFormat; - // We DO NOT update targetIndex at function exit to take advantage of the fact that - // getIndexForTextureTarget() is constexpr -- so all of this disappears at compile time. switch (target) { case SamplerType::SAMPLER_EXTERNAL: // we can't be here -- doesn't matter what we do case SamplerType::SAMPLER_2D: t->gl.target = GL_TEXTURE_2D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D); break; case SamplerType::SAMPLER_3D: t->gl.target = GL_TEXTURE_3D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_3D); break; case SamplerType::SAMPLER_2D_ARRAY: t->gl.target = GL_TEXTURE_2D_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_ARRAY); break; case SamplerType::SAMPLER_CUBEMAP: t->gl.target = GL_TEXTURE_CUBE_MAP; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP); break; case SamplerType::SAMPLER_CUBEMAP_ARRAY: t->gl.target = GL_TEXTURE_CUBE_MAP_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP_ARRAY); break; } @@ -795,8 +787,6 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint if (gl.features.multisample_texture) { // multi-sample texture on GL 3.2 / GLES 3.1 and above t->gl.target = GL_TEXTURE_2D_MULTISAMPLE; - t->gl.targetIndex = (uint8_t) - OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_MULTISAMPLE); } else { // Turn off multi-sampling for that texture. It's just not supported. } @@ -855,32 +845,24 @@ void OpenGLDriver::importTextureR(Handle th, intptr_t id, t->gl.internalFormat = getInternalFormat(format); assert_invariant(t->gl.internalFormat); - // We DO NOT update targetIndex at function exit to take advantage of the fact that - // getIndexForTextureTarget() is constexpr -- so all of this disappears at compile time. switch (target) { case SamplerType::SAMPLER_EXTERNAL: t->gl.target = GL_TEXTURE_EXTERNAL_OES; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_EXTERNAL_OES); break; case SamplerType::SAMPLER_2D: t->gl.target = GL_TEXTURE_2D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D); break; case SamplerType::SAMPLER_3D: t->gl.target = GL_TEXTURE_3D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_3D); break; case SamplerType::SAMPLER_2D_ARRAY: t->gl.target = GL_TEXTURE_2D_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_ARRAY); break; case SamplerType::SAMPLER_CUBEMAP: t->gl.target = GL_TEXTURE_CUBE_MAP; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP); break; case SamplerType::SAMPLER_CUBEMAP_ARRAY: t->gl.target = GL_TEXTURE_CUBE_MAP_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP_ARRAY); break; } @@ -891,7 +873,6 @@ void OpenGLDriver::importTextureR(Handle th, intptr_t id, if (gl.features.multisample_texture) { // multi-sample texture on GL 3.2 / GLES 3.1 and above t->gl.target = GL_TEXTURE_2D_MULTISAMPLE; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_MULTISAMPLE); } else { // Turn off multi-sampling for that texture. It's just not supported. } @@ -1781,7 +1762,6 @@ void OpenGLDriver::updateStreams(DriverApi* driver) { // the target and id can be reset each time t->gl.target = t->externalTexture->target; t->gl.id = t->externalTexture->id; - t->gl.targetIndex = (uint8_t)OpenGLContext::getIndexForTextureTarget(t->gl.target); bindTexture(OpenGLContext::DUMMY_TEXTURE_BINDING, t); } } @@ -2667,7 +2647,6 @@ void OpenGLDriver::setExternalImage(Handle th, void* image) { // the target and id can be reset each time t->gl.target = t->externalTexture->target; t->gl.id = t->externalTexture->id; - t->gl.targetIndex = (uint8_t)OpenGLContext::getIndexForTextureTarget(t->gl.target); bindTexture(OpenGLContext::DUMMY_TEXTURE_BINDING, t); } } @@ -3420,7 +3399,7 @@ void OpenGLDriver::beginFrame( assert_invariant(t->hwStream->stream); platform.updateTexImage(t->hwStream->stream, &static_cast(t->hwStream)->user_thread.timestamp); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast) - // NOTE: We assume that updateTexImage() binds the texture on our behalf + // NOTE: We assume that OpenGLPlatform::updateTexImage() binds the texture on our behalf gl.updateTexImage(GL_TEXTURE_EXTERNAL_OES, t->gl.id); } } From 490e8cf0d0678419e2a0b21067d1c9f2baed78ac Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Tue, 23 Apr 2024 12:38:23 -0400 Subject: [PATCH 05/31] Add option to preserve text shaders (#7786) --- tools/matedit/src/ExternalCompile.cpp | 80 +++++++++++++++------------ tools/matedit/src/ExternalCompile.h | 3 +- tools/matedit/src/main.cpp | 25 ++++++--- 3 files changed, 66 insertions(+), 42 deletions(-) diff --git a/tools/matedit/src/ExternalCompile.cpp b/tools/matedit/src/ExternalCompile.cpp index 11d1583d1b1..f8f13dbbf8a 100644 --- a/tools/matedit/src/ExternalCompile.cpp +++ b/tools/matedit/src/ExternalCompile.cpp @@ -287,7 +287,8 @@ bool compileMetalShaders(const std::vector& mslEntries, return true; } -int externalCompile(utils::Path input, utils::Path output, std::vector args) { +int externalCompile(utils::Path input, utils::Path output, bool preserveTextShaders, + std::vector args) { std::ifstream in(input.c_str(), std::ifstream::in | std::ios::binary); if (!in.is_open()) { std::cerr << "Could not open the source material " << input << std::endl; @@ -333,18 +334,7 @@ int externalCompile(utils::Path input, utils::Path output, std::vector(c.desc.start), c.desc.size, c.type); } - // Add the re-generated text dictionary chunk and text-based shaders. - if (!textDictionary.isEmpty()) { - const auto& dictionaryChunk = outputChunks.push( - std::move(textDictionary), filamat::ChunkType::DictionaryText); - - // Re-emit GLSL chunk (MaterialTextChunk). - if (!glslEntries.empty()) { - outputChunks.push(std::move(glslEntries), - dictionaryChunk.getDictionary(), filamat::ChunkType::MaterialGlsl); + if (removingMslShaders) { + // Since we're modifying text shaders, we'll need to regenerate the text dictionary. + // We'll also need to re-emit text based shaders that rely on the dictionary. + // Here we ONLY add GLSL and ESSL 1 types, as we're removing MSL completely. + filamat::LineDictionary textDictionary; + for (const auto& s : glslEntries) { + textDictionary.addText(s.shader); } + for (const auto& s : essl1Entries) { + textDictionary.addText(s.shader); + } + + // Add the re-generated text dictionary chunk and text-based shaders. + if (!textDictionary.isEmpty()) { + const auto& dictionaryChunk = outputChunks.push( + std::move(textDictionary), filamat::ChunkType::DictionaryText); - // Re-emit ESSL1 chunk (MaterialTextChunk). - if (!essl1Entries.empty()) { - outputChunks.push(std::move(essl1Entries), - dictionaryChunk.getDictionary(), filamat::ChunkType::MaterialEssl1); + // Re-emit GLSL chunk (MaterialTextChunk). + if (!glslEntries.empty()) { + outputChunks.push(std::move(glslEntries), + dictionaryChunk.getDictionary(), filamat::ChunkType::MaterialGlsl); + } + + // Re-emit ESSL1 chunk (MaterialTextChunk). + if (!essl1Entries.empty()) { + outputChunks.push(std::move(essl1Entries), + dictionaryChunk.getDictionary(), filamat::ChunkType::MaterialEssl1); + } } } diff --git a/tools/matedit/src/ExternalCompile.h b/tools/matedit/src/ExternalCompile.h index d24907c6904..0fc13dd17f1 100644 --- a/tools/matedit/src/ExternalCompile.h +++ b/tools/matedit/src/ExternalCompile.h @@ -24,7 +24,8 @@ namespace matedit { -int externalCompile(utils::Path input, utils::Path output, std::vector args); +int externalCompile(utils::Path input, utils::Path output, bool preserveTextShaders, + std::vector args); } // namespace matedit diff --git a/tools/matedit/src/main.cpp b/tools/matedit/src/main.cpp index 0adc9808773..05c2a2bd908 100644 --- a/tools/matedit/src/main.cpp +++ b/tools/matedit/src/main.cpp @@ -28,6 +28,7 @@ struct Config { utils::Path inputFile; utils::Path outputFile; std::vector commandArgs; + bool preserveTextShaders = false; }; static void printUsage(const char* name) { @@ -55,6 +56,9 @@ static void printUsage(const char* name) { " --type=[shader type], -t\n" " Specify the shader type, currently only metal is supported\n" "\n" + " --preserve-text-shaders, -p\n" + " Keep the text-based shaders when writing the output file\n" + "\n" "Commands:\n" " external-compile\n" " Transforms all the text-based shaders of the specified type in the input file into binaries\n" @@ -76,7 +80,8 @@ static void printUsage(const char* name) { " is guaranteed to have a .metal extension.\n" " is guaranteed to have a .metallib extension.\n" "\n" - " This command will remove the text-based shaders when writing the output material file.\n" + " This command will remove the text-based shaders when writing the output material file, unless\n" + " the --preserve-text-shaders option is specified.\n" "\n" " If script exits with a non-zero exit code, MATEDIT will terminate with error. Multiple\n" " invocations of script may be launched in parallel.\n" @@ -93,12 +98,13 @@ static void printUsage(const char* name) { } static int handleArguments(int argc, char* argv[], Config* config) { - static constexpr const char* OPTSTR = "hi:o:t:"; + static constexpr const char* OPTSTR = "hi:o:t:p"; static const struct option OPTIONS[] = { - { "help", no_argument, nullptr, 'h' }, - { "input", required_argument, nullptr, 'i' }, - { "output", required_argument, nullptr, 'o' }, - { "type", required_argument, nullptr, 't' }, + { "help", no_argument, nullptr, 'h' }, + { "input", required_argument, nullptr, 'i' }, + { "output", required_argument, nullptr, 'o' }, + { "type", required_argument, nullptr, 't' }, + { "preserve-text-shaders", no_argument, nullptr, 'p' }, { nullptr, 0, nullptr, 0 } // termination of the option list }; @@ -126,6 +132,10 @@ static int handleArguments(int argc, char* argv[], Config* config) { << std::endl; exit(1); } + break; + case 'p': + config->preserveTextShaders = true; + break; } } @@ -176,5 +186,6 @@ int main(int argc, char* argv[]) { return 1; } - return matedit::externalCompile(config.inputFile, config.outputFile, config.commandArgs); + return matedit::externalCompile( + config.inputFile, config.outputFile, config.preserveTextShaders, config.commandArgs); } From e4442a5eb286e2083787f1bbe11a90a41a11df1b Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Mon, 22 Apr 2024 14:58:28 -0700 Subject: [PATCH 06/31] optimize the inner rendering loop This largely undoes a change I did recently where PrimitiveInfo has a FRenderPrimitive* to save some space and keep a command at 64 bytes. This wasn't a good idea because the inner rendering loop shouldn't to any dereference in the common case. This change reorganises PrimitiveInfo such that it stores all the data necessary to render a primitive in the common case. The less common cases are when hybrid instancing, morphing or skinning are used; in those cases, a dereference into the renderable SOA is needed. PrimitiveInfo currently has 16 bytes free, which we keep for futur use. --- filament/src/RenderPass.cpp | 125 +++++++++++++++++++++--------------- filament/src/RenderPass.h | 31 +++++---- 2 files changed, 93 insertions(+), 63 deletions(-) diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp index 16489cb098c..2b49fd15956 100644 --- a/filament/src/RenderPass.cpp +++ b/filament/src/RenderPass.cpp @@ -213,7 +213,7 @@ void RenderPass::appendCommands(FEngine& engine, // This must be done from the main thread. for (Command const* first = curr, *last = curr + commandCount ; first != last ; ++first) { if (UTILS_LIKELY((first->key & CUSTOM_MASK) == uint64_t(CustomCommand::PASS))) { - auto ma = first->primitive.primitive->getMaterialInstance()->getMaterial(); + auto ma = first->primitive.mi->getMaterial(); ma->prepareProgram(first->primitive.materialVariant); } } @@ -290,19 +290,24 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { while (curr != last) { - // we can't have nice things! No more than maxInstanceCount due to UBO size limits - Command const* const e = std::find_if_not(curr, std::min(last, curr + maxInstanceCount), - [lhs = *curr](Command const& rhs) { - // primitives must be identical to be instanced. Currently, instancing doesn't support - // skinning/morphing. - return lhs.primitive.primitive == rhs.primitive.primitive && - lhs.primitive.rasterState == rhs.primitive.rasterState && - lhs.primitive.skinningHandle == rhs.primitive.skinningHandle && - lhs.primitive.skinningOffset == rhs.primitive.skinningOffset && - lhs.primitive.morphWeightBuffer == rhs.primitive.morphWeightBuffer && - lhs.primitive.morphTargetBuffer == rhs.primitive.morphTargetBuffer && - lhs.primitive.skinningTexture == rhs.primitive.skinningTexture ; - }); + // Currently, if we have skinnning or morphing, we can't use auto instancing. This is + // because the morphing/skinning data for comparison is not easily accessible. + // Additionally, we can't have a different skinning/morphing per instance anyway. + Command const* e = curr + 1; + if (UTILS_LIKELY(!curr->primitive.hasSkinning && !curr->primitive.hasMorphing)) { + // we can't have nice things! No more than maxInstanceCount due to UBO size limits + e = std::find_if_not(curr, std::min(last, curr + maxInstanceCount), + [lhs = *curr](Command const& rhs) { + // primitives must be identical to be instanced. + // Currently, instancing doesn't support skinning/morphing. + return lhs.primitive.mi == rhs.primitive.mi && + lhs.primitive.rph == rhs.primitive.rph && + lhs.primitive.vbih == rhs.primitive.vbih && + lhs.primitive.indexOffset == rhs.primitive.indexOffset && + lhs.primitive.indexCount == rhs.primitive.indexCount && + lhs.primitive.rasterState == rhs.primitive.rasterState; + }); + } uint32_t const instanceCount = e - curr; assert_invariant(instanceCount > 0); @@ -584,7 +589,7 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla cmdColor.primitive.index = i; cmdColor.primitive.instanceCount = soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; - cmdColor.primitive.instanceBufferHandle = soaInstanceInfo[i].handle; + cmdColor.primitive.hasHybridInstancing = (bool)soaInstanceInfo[i].handle; // soaInstanceInfo[i].count is the number of instances the user has requested, either for // manual or hybrid instancing. Instanced stereo multiplies the number of instances by the @@ -614,14 +619,11 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla cmdDepth.primitive.index = i; cmdDepth.primitive.instanceCount = soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; - cmdDepth.primitive.instanceBufferHandle = soaInstanceInfo[i].handle; + cmdDepth.primitive.hasHybridInstancing = (bool)soaInstanceInfo[i].handle; cmdDepth.primitive.materialVariant.setSkinning(hasSkinningOrMorphing); cmdDepth.primitive.rasterState.inverseFrontFaces = inverseFrontFaces; - - cmdDepth.primitive.skinningHandle = skinning.handle; - cmdDepth.primitive.skinningOffset = skinning.offset; - cmdDepth.primitive.skinningTexture = skinning.handleSampler; - cmdDepth.primitive.morphWeightBuffer = morphing.handle; + cmdDepth.primitive.hasMorphing = (bool)morphing.handle; + cmdDepth.primitive.hasSkinning = (bool)skinning.handle; if (UTILS_UNLIKELY(hasInstancedStereo)) { cmdColor.primitive.instanceCount = @@ -631,11 +633,8 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla } if constexpr (isColorPass) { renderableVariant.setFog(soaVisibility[i].fog && Variant::isFogVariant(variant)); - - cmdColor.primitive.skinningHandle = skinning.handle; - cmdColor.primitive.skinningOffset = skinning.offset; - cmdColor.primitive.skinningTexture = skinning.handleSampler; - cmdColor.primitive.morphWeightBuffer = morphing.handle; + cmdColor.primitive.hasMorphing = (bool)morphing.handle; + cmdColor.primitive.hasSkinning = (bool)skinning.handle; } const bool shadowCaster = soaVisibility[i].castShadows & hasShadowing; @@ -656,7 +655,13 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // skinning or morphing. if constexpr (isColorPass) { - cmdColor.primitive.primitive = &primitive; + cmdColor.primitive.mi = mi; + cmdColor.primitive.rph = primitive.getHwHandle(); + cmdColor.primitive.vbih = primitive.getVertexBufferInfoHandle(); + cmdColor.primitive.indexOffset = primitive.getIndexOffset(); + cmdColor.primitive.indexCount = primitive.getIndexCount(); + cmdColor.primitive.type = primitive.getPrimitiveType(); + RenderPass::setupColorCommand(cmdColor, renderableVariant, mi, inverseFrontFaces); cmdColor.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle(); @@ -759,7 +764,13 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla cmdDepth.key |= mi->getSortingKey(); // already all set-up for direct or'ing // unconditionally write the command - cmdDepth.primitive.primitive = &primitive; + cmdDepth.primitive.mi = mi; + cmdDepth.primitive.rph = primitive.getHwHandle(); + cmdDepth.primitive.vbih = primitive.getVertexBufferInfoHandle(); + cmdDepth.primitive.indexOffset = primitive.getIndexOffset(); + cmdDepth.primitive.indexCount = primitive.getIndexCount(); + cmdDepth.primitive.type = primitive.getPrimitiveType(); + cmdDepth.primitive.rasterState.culling = mi->getCullingMode(); cmdDepth.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle(); @@ -915,20 +926,20 @@ void RenderPass::Executor::execute(FEngine& engine, } // primitiveHandle may be invalid if no geometry was set on the renderable. - if (UTILS_UNLIKELY(!first->primitive.primitive->getHwHandle())) { + if (UTILS_UNLIKELY(!first->primitive.rph)) { continue; } // per-renderable uniform PrimitiveInfo const info = first->primitive; pipeline.rasterState = info.rasterState; - pipeline.vertexBufferInfo = info.primitive->getVertexBufferInfoHandle(); - pipeline.primitiveType = info.primitive->getPrimitiveType(); + pipeline.vertexBufferInfo = info.vbih; + pipeline.primitiveType = info.type; assert_invariant(pipeline.vertexBufferInfo); - if (UTILS_UNLIKELY(mi != info.primitive->getMaterialInstance())) { + if (UTILS_UNLIKELY(mi != info.mi)) { // this is always taken the first time - mi = info.primitive->getMaterialInstance(); + mi = info.mi; assert_invariant(mi); ma = mi->getMaterial(); @@ -960,9 +971,10 @@ void RenderPass::Executor::execute(FEngine& engine, info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK; auto getPerObjectUboHandle = [this, &info, &instanceCount]() -> std::pair, uint32_t> { - if (info.instanceBufferHandle) { + if (info.hasHybridInstancing) { + FScene::RenderableSoa const& soa = *mRenderableSoa; // "hybrid" instancing -- instanceBufferHandle takes the place of the UBO - return { info.instanceBufferHandle, 0 }; + return { soa.elementAt(info.index).handle, 0 }; } bool const userInstancing = (info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u; @@ -987,16 +999,22 @@ void RenderPass::Executor::execute(FEngine& engine, offset, sizeof(PerRenderableUib)); - if (UTILS_UNLIKELY(info.skinningHandle)) { + if (UTILS_UNLIKELY(info.hasSkinning)) { + + FScene::RenderableSoa const& soa = *mRenderableSoa; + + const FRenderableManager::SkinningBindingInfo& skinning = + soa.elementAt(info.index); + // note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations driver.bindBufferRange(BufferObjectBinding::UNIFORM, +UniformBindingPoints::PER_RENDERABLE_BONES, - info.skinningHandle, - info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData), + skinning.handle, + skinning.offset * sizeof(PerRenderableBoneUib::BoneData), sizeof(PerRenderableBoneUib)); // note: always bind the skinningTexture because the shader needs it. driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, - info.skinningTexture); + skinning.handleSampler); // note: even if only skinning is enabled, binding morphTargetBuffer is needed. driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, info.morphTargetBuffer); @@ -1006,16 +1024,25 @@ void RenderPass::Executor::execute(FEngine& engine, rebindPipeline = true; } - if (UTILS_UNLIKELY(info.morphWeightBuffer)) { + if (UTILS_UNLIKELY(info.hasMorphing)) { + + FScene::RenderableSoa const& soa = *mRenderableSoa; + + const FRenderableManager::SkinningBindingInfo& skinning = + soa.elementAt(info.index); + + const FRenderableManager::MorphingBindingInfo& morphing = + soa.elementAt(info.index); + // Instead of using a UBO per primitive, we could also have a single UBO for all // primitives and use bindUniformBufferRange which might be more efficient. driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING, - info.morphWeightBuffer); + morphing.handle); driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, info.morphTargetBuffer); // note: even if only morphing is enabled, binding skinningTexture is needed. driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, - info.skinningTexture); + skinning.handleSampler); // FIXME: Currently we need to rebind the PipelineState when texture or // UBO binding change. @@ -1029,15 +1056,12 @@ void RenderPass::Executor::execute(FEngine& engine, driver.bindPipeline(pipeline); } - if (info.primitive->getHwHandle() != currentPrimitiveHandle) { - currentPrimitiveHandle = info.primitive->getHwHandle(); - driver.bindRenderPrimitive(info.primitive->getHwHandle()); + if (info.rph != currentPrimitiveHandle) { + currentPrimitiveHandle = info.rph; + driver.bindRenderPrimitive(info.rph); } - driver.draw2( - info.primitive->getIndexOffset(), - info.primitive->getIndexCount(), - instanceCount); + driver.draw2(info.indexOffset, info.indexCount, instanceCount); } } @@ -1057,7 +1081,8 @@ void RenderPass::Executor::execute(FEngine& engine, // ------------------------------------------------------------------------------------------------ RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept - : mCommands(b, e), + : mRenderableSoa(&pass->mRenderableSoa), + mCommands(b, e), mCustomCommands(pass->mCustomCommands.data(), pass->mCustomCommands.size()), mUboHandle(pass->mUboHandle), mInstancedUboHandle(pass->mInstancedUboHandle), diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h index fe6e04cafc9..4a45d057da0 100644 --- a/filament/src/RenderPass.h +++ b/filament/src/RenderPass.h @@ -238,22 +238,26 @@ class RenderPass { struct PrimitiveInfo { // 56 bytes union { - FRenderPrimitive const* primitive; // 8 bytes; - uint64_t padding = {}; // ensures primitive is 8 bytes on all archs - }; // 8 bytes - uint64_t rfu0; // 8 bytes - backend::RasterState rasterState; // 4 bytes - backend::Handle skinningHandle; // 4 bytes - backend::Handle skinningTexture; // 4 bytes - backend::Handle morphWeightBuffer; // 4 bytes - backend::Handle morphTargetBuffer; // 4 bytes - backend::Handle instanceBufferHandle; // 4 bytes + FMaterialInstance const* mi; + uint64_t padding; // make this field 64 bits on all platforms + }; + backend::RenderPrimitiveHandle rph; // 4 bytes + backend::VertexBufferInfoHandle vbih; // 4 bytes + uint32_t indexOffset; // 4 bytes + uint32_t indexCount; // 4 bytes uint32_t index = 0; // 4 bytes - uint32_t skinningOffset = 0; // 4 bytes + backend::SamplerGroupHandle morphTargetBuffer; // 4 bytes + + backend::RasterState rasterState; // 4 bytes + uint16_t instanceCount; // 2 bytes [MSb: user] Variant materialVariant; // 1 byte - uint8_t rfu1; // 1 byte - uint32_t rfu2; // 4 byte + backend::PrimitiveType type : 3; // 1 byte 3 bits + bool hasSkinning : 1; // 1 bit + bool hasMorphing : 1; // 1 bit + bool hasHybridInstancing : 1; // 1 bit + + uint64_t rfu[2]; // 16 bytes static const uint16_t USER_INSTANCE_MASK = 0x8000u; static const uint16_t INSTANCE_COUNT_MASK = 0x7fffu; @@ -316,6 +320,7 @@ class RenderPass { friend class RenderPassBuilder; // these fields are constant after creation + FScene::RenderableSoa const* mRenderableSoa = nullptr; utils::Slice mCommands; utils::Slice mCustomCommands; backend::Handle mUboHandle; From b56379dc0f6d6ca4b3b01f1cf0009d7dc22e404f Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Tue, 23 Apr 2024 14:05:53 -0600 Subject: [PATCH 07/31] Add license information to matedit (#7790) --- tools/matedit/CMakeLists.txt | 8 ++++++++ tools/matedit/src/main.cpp | 20 +++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/matedit/CMakeLists.txt b/tools/matedit/CMakeLists.txt index e88aad24b01..2c1b24711e3 100644 --- a/tools/matedit/CMakeLists.txt +++ b/tools/matedit/CMakeLists.txt @@ -22,6 +22,14 @@ target_link_libraries(${TARGET} matdbg getopt) set_target_properties(${TARGET} PROPERTIES FOLDER Tools) +# ================================================================================================= +# Licenses +# ================================================================================================== +set(MODULE_LICENSES getopt) +set(GENERATION_ROOT ${CMAKE_CURRENT_BINARY_DIR}/generated) +list_licenses(${GENERATION_ROOT}/licenses/licenses.inc ${MODULE_LICENSES}) +target_include_directories(${TARGET} PRIVATE ${GENERATION_ROOT}) + # ================================================================================================== # Installation # ================================================================================================== diff --git a/tools/matedit/src/main.cpp b/tools/matedit/src/main.cpp index 05c2a2bd908..1063a13dce6 100644 --- a/tools/matedit/src/main.cpp +++ b/tools/matedit/src/main.cpp @@ -47,6 +47,9 @@ static void printUsage(const char* name) { " --help, -h\n" " Print this message\n" "\n" + " --license, -l\n" + " Print copyright and license information\n" + "\n" " --input=[input file], -i\n" " Specify path to input compiled material file\n" "\n" @@ -97,10 +100,22 @@ static void printUsage(const char* name) { printf("%s", usage.c_str()); } +static void license() { + static const char *license[] = { + #include "licenses/licenses.inc" + nullptr + }; + + const char **p = &license[0]; + while (*p) + std::cout << *p++ << std::endl; +} + static int handleArguments(int argc, char* argv[], Config* config) { - static constexpr const char* OPTSTR = "hi:o:t:p"; + static constexpr const char* OPTSTR = "hli:o:t:p"; static const struct option OPTIONS[] = { { "help", no_argument, nullptr, 'h' }, + { "license", no_argument, nullptr, 'l' }, { "input", required_argument, nullptr, 'i' }, { "output", required_argument, nullptr, 'o' }, { "type", required_argument, nullptr, 't' }, @@ -118,6 +133,9 @@ static int handleArguments(int argc, char* argv[], Config* config) { case 'h': printUsage(argv[0]); exit(0); + case 'l': + license(); + exit(0); case 'i': config->inputFile = arg; break; From 93dc218b83d0a84543e823a92254012bac841c70 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Mon, 22 Apr 2024 11:24:17 -0700 Subject: [PATCH 08/31] fix PerShadowMapUniform includes --- filament/src/PerShadowMapUniforms.cpp | 18 ++++++++++++------ filament/src/PerShadowMapUniforms.h | 15 ++++----------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/filament/src/PerShadowMapUniforms.cpp b/filament/src/PerShadowMapUniforms.cpp index 3515ef2b436..458b15db836 100644 --- a/filament/src/PerShadowMapUniforms.cpp +++ b/filament/src/PerShadowMapUniforms.cpp @@ -16,12 +16,19 @@ #include "PerShadowMapUniforms.h" -#include "ShadowMapManager.h" - #include "details/Camera.h" #include "details/Engine.h" #include +#include + +#include + +#include + +#include + +#include namespace filament { @@ -88,8 +95,8 @@ void PerShadowMapUniforms::prepareViewport(Transaction const& transaction, void PerShadowMapUniforms::prepareTime(Transaction const& transaction, FEngine& engine, math::float4 const& userTime) noexcept { auto& s = edit(transaction); - const uint64_t oneSecondRemainder = engine.getEngineTime().count() % 1000000000; - const float fraction = float(double(oneSecondRemainder) / 1000000000.0); + const uint64_t oneSecondRemainder = engine.getEngineTime().count() % 1'000'000'000; + const float fraction = float(double(oneSecondRemainder) / 1'000'000'000.0); s.time = fraction; s.userTime = userTime; } @@ -102,7 +109,6 @@ void PerShadowMapUniforms::prepareShadowMapping(Transaction const& transaction, s.vsmExponent = highPrecision ? high : low; } - PerShadowMapUniforms::Transaction PerShadowMapUniforms::open(backend::DriverApi& driver) noexcept { Transaction transaction; // TODO: use out-of-line buffer if too large @@ -114,7 +120,7 @@ PerShadowMapUniforms::Transaction PerShadowMapUniforms::open(backend::DriverApi& void PerShadowMapUniforms::commit(Transaction& transaction, backend::DriverApi& driver) noexcept { driver.updateBufferObject(mUniformBufferHandle, { - transaction.uniforms, sizeof(PerViewUib) }, 0); + transaction.uniforms, sizeof(PerViewUib) }, 0); transaction.uniforms = nullptr; } diff --git a/filament/src/PerShadowMapUniforms.h b/filament/src/PerShadowMapUniforms.h index 42ca2eb1593..05fcdd82e5f 100644 --- a/filament/src/PerShadowMapUniforms.h +++ b/filament/src/PerShadowMapUniforms.h @@ -17,18 +17,13 @@ #ifndef TNT_FILAMENT_PERSHADOWMAPUNIFORMS_H #define TNT_FILAMENT_PERSHADOWMAPUNIFORMS_H -#include - #include -#include - -#include "TypedUniformBuffer.h" +#include +#include #include -#include - -#include +#include namespace filament { @@ -40,13 +35,11 @@ class LightManager; /* * PerShadowMapUniforms manages the UBO needed to generate our shadow maps. Internally it just * holds onto a `PerViewUniform` UBO handle, but doesn't keep any shadow copy of it, instead it - * writes the data directly into the commandstream, for this reason partial update of the data + * writes the data directly into the CommandStream, for this reason partial update of the data * is not possible. */ class PerShadowMapUniforms { - using LightManagerInstance = utils::EntityInstance; - public: class Transaction { friend PerShadowMapUniforms; From d26c972f5bc229854f2650452b82cba5c318eb57 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 23 Apr 2024 11:33:34 -0700 Subject: [PATCH 09/31] add missing includes in RenerableManager --- filament/src/components/RenderableManager.cpp | 29 +++++++++++++++++-- filament/src/components/RenderableManager.h | 6 ++-- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/filament/src/components/RenderableManager.cpp b/filament/src/components/RenderableManager.cpp index 3bbd85597ce..2cd031059b3 100644 --- a/filament/src/components/RenderableManager.cpp +++ b/filament/src/components/RenderableManager.cpp @@ -26,15 +26,40 @@ #include "details/InstanceBuffer.h" #include "details/Material.h" -#include "filament/RenderableManager.h" +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include #include +#include #include -#include +#include + +#include +#include +#include +#include + +#include +#include #include +#include +#include + +#include +#include +#include using namespace filament::math; using namespace utils; diff --git a/filament/src/components/RenderableManager.h b/filament/src/components/RenderableManager.h index 003026baa0d..70332c63bbf 100644 --- a/filament/src/components/RenderableManager.h +++ b/filament/src/components/RenderableManager.h @@ -20,24 +20,22 @@ #include "downcast.h" #include "HwRenderPrimitiveFactory.h" -#include "UniformBuffer.h" #include
#include +#include #include -#include - #include #include #include #include +#include #include #include #include -#include #include From 6146d071ba4369b353df0ad5a71cb9e5f809d0f7 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 23 Apr 2024 11:31:44 -0700 Subject: [PATCH 10/31] StructureOfArray::push_back(&&) didn't do a move --- libs/utils/include/utils/StructureOfArrays.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/utils/include/utils/StructureOfArrays.h b/libs/utils/include/utils/StructureOfArrays.h index a430958470c..65e4b3305cc 100644 --- a/libs/utils/include/utils/StructureOfArrays.h +++ b/libs/utils/include/utils/StructureOfArrays.h @@ -368,7 +368,7 @@ class StructureOfArraysBase { size_t last = mSize++; // Fold expression on the comma operator ([&]{ - new(std::get(mArrays) + last) Elements{std::get(args)}; + new(std::get(mArrays) + last) Elements{std::get(std::forward(args))}; }() , ...); } From 99eac62b4e0f58b3beaf1d49effe76b6dfdecaae Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 23 Apr 2024 11:17:37 -0700 Subject: [PATCH 11/31] give proper move semantics to Handle<> --- filament/backend/include/backend/Handle.h | 15 +++++++++++++++ filament/src/details/VertexBuffer.cpp | 2 +- libs/utils/include/utils/StructureOfArrays.h | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/filament/backend/include/backend/Handle.h b/filament/backend/include/backend/Handle.h index 4b63607a1cf..c54e9609cef 100644 --- a/filament/backend/include/backend/Handle.h +++ b/filament/backend/include/backend/Handle.h @@ -75,6 +75,19 @@ class HandleBase { HandleBase(HandleBase const& rhs) noexcept = default; HandleBase& operator=(HandleBase const& rhs) noexcept = default; + HandleBase(HandleBase&& rhs) noexcept + : object(rhs.object) { + rhs.object = nullid; + } + + HandleBase& operator=(HandleBase&& rhs) noexcept { + if (this != &rhs) { + object = rhs.object; + rhs.object = nullid; + } + return *this; + } + private: HandleId object; }; @@ -89,8 +102,10 @@ struct Handle : public HandleBase { Handle() noexcept = default; Handle(Handle const& rhs) noexcept = default; + Handle(Handle&& rhs) noexcept = default; Handle& operator=(Handle const& rhs) noexcept = default; + Handle& operator=(Handle&& rhs) noexcept = default; explicit Handle(HandleId id) noexcept : HandleBase(id) { } diff --git a/filament/src/details/VertexBuffer.cpp b/filament/src/details/VertexBuffer.cpp index d718bbe0a85..2edf62d8555 100644 --- a/filament/src/details/VertexBuffer.cpp +++ b/filament/src/details/VertexBuffer.cpp @@ -314,7 +314,7 @@ FVertexBuffer::FVertexBuffer(FEngine& engine, const VertexBuffer::Builder& build void FVertexBuffer::terminate(FEngine& engine) { FEngine::DriverApi& driver = engine.getDriverApi(); if (!mBufferObjectsEnabled) { - for (BufferObjectHandle const bo : mBufferObjects) { + for (BufferObjectHandle bo : mBufferObjects) { driver.destroyBufferObject(bo); } } diff --git a/libs/utils/include/utils/StructureOfArrays.h b/libs/utils/include/utils/StructureOfArrays.h index 65e4b3305cc..c0b2315eccf 100644 --- a/libs/utils/include/utils/StructureOfArrays.h +++ b/libs/utils/include/utils/StructureOfArrays.h @@ -513,7 +513,7 @@ class StructureOfArraysBase { return (soa.elementAt(i) = other); } UTILS_ALWAYS_INLINE Type const& operator = (Type&& other) noexcept { - return (soa.elementAt(i) = other); + return (soa.elementAt(i) = std::forward(other)); } // comparisons UTILS_ALWAYS_INLINE bool operator==(Type const& other) const { From c8335fade732f1f42ca877743384c5cf6139dbbf Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Wed, 24 Apr 2024 11:27:57 -0700 Subject: [PATCH 12/31] vk: Fix unsupported depth blitting (#7789) * vk: Fix unsupported depth blitting On certain hardware (pixel 4 for example), blitting of depth texture is not supported as an "optimalTilingFeature". In these cases, we'd would need to do a shader-based blit. We - Add the shader blit in PostProcessingManager - Add a driver API to check for support for blitting depthStencil attachments. - Fix some debugging ifdefs in vk backend. The validation fixed is: `[ VUID-vkCmdBlitImage-dstImage-02000 ] Object 0: handle = 0xb400007c300701d0, type = VK_OBJECT_TYPE_COMMAND_BUFFER; Object 1: handle = 0xf2039b0000000771, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 0x86bc2a78 | In vkCmdBlitImage, VkFormatFeatureFlags (0x1c601) does not support required feature VK_FORMAT_FEATURE_2_BLIT_DST_BIT for format 126 used by VkImage 0xf2039b0000000771[] with tiling VK_IMAGE_TILING_OPTIMAL. The Vulkan spec states: The format features of dstImage must contain VK_FORMAT_FEATURE_BLIT_DST_BIT` --- filament/CMakeLists.txt | 1 + .../include/private/backend/DriverAPI.inc | 1 + filament/backend/src/metal/MetalDriver.mm | 4 + filament/backend/src/noop/NoopDriver.cpp | 4 + filament/backend/src/opengl/OpenGLDriver.cpp | 4 + filament/backend/src/vulkan/VulkanBlitter.cpp | 4 - filament/backend/src/vulkan/VulkanConstants.h | 4 +- filament/backend/src/vulkan/VulkanContext.h | 11 +- filament/backend/src/vulkan/VulkanDriver.cpp | 7 +- .../backend/src/vulkan/VulkanImageUtility.cpp | 2 +- .../src/vulkan/platform/VulkanPlatform.cpp | 34 ++++- .../platform/VulkanPlatformSwapChainImpl.cpp | 4 +- filament/src/PostProcessManager.cpp | 140 ++++++++++++------ filament/src/PostProcessManager.h | 6 +- filament/src/materials/blitDepth.mat | 42 ++++++ 15 files changed, 203 insertions(+), 65 deletions(-) create mode 100644 filament/src/materials/blitDepth.mat diff --git a/filament/CMakeLists.txt b/filament/CMakeLists.txt index b978d2da1bd..12e5ca76691 100644 --- a/filament/CMakeLists.txt +++ b/filament/CMakeLists.txt @@ -214,6 +214,7 @@ set(PRIVATE_HDRS set(MATERIAL_SRCS src/materials/antiAliasing/fxaa.mat src/materials/antiAliasing/taa.mat + src/materials/blitDepth.mat src/materials/blitLow.mat src/materials/blitArray.mat src/materials/bloom/bloomDownsample.mat diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index 844841862b3..2e2f81d9d37 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -304,6 +304,7 @@ DECL_DRIVER_API_SYNCHRONOUS_0(bool, isProtectedContentSupported) DECL_DRIVER_API_SYNCHRONOUS_N(bool, isStereoSupported, backend::StereoscopicType, stereoscopicType) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isParallelShaderCompileSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isDepthStencilResolveSupported) +DECL_DRIVER_API_SYNCHRONOUS_N(bool, isDepthStencilBlitSupported, backend::TextureFormat, format) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isProtectedTexturesSupported) DECL_DRIVER_API_SYNCHRONOUS_0(uint8_t, getMaxDrawBuffers) DECL_DRIVER_API_SYNCHRONOUS_0(size_t, getMaxUniformBufferSize) diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index d5e8dbf981e..65d1c918033 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -806,6 +806,10 @@ return false; } +bool MetalDriver::isDepthStencilBlitSupported(TextureFormat format) { + return true; +} + bool MetalDriver::isProtectedTexturesSupported() { return false; } diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp index 3eb88afe577..911f967413d 100644 --- a/filament/backend/src/noop/NoopDriver.cpp +++ b/filament/backend/src/noop/NoopDriver.cpp @@ -194,6 +194,10 @@ bool NoopDriver::isDepthStencilResolveSupported() { return true; } +bool NoopDriver::isDepthStencilBlitSupported(TextureFormat format) { + return true; +} + bool NoopDriver::isProtectedTexturesSupported() { return true; } diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index 15f7df08951..bad0bba635a 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -2023,6 +2023,10 @@ bool OpenGLDriver::isDepthStencilResolveSupported() { return true; } +bool OpenGLDriver::isDepthStencilBlitSupported(TextureFormat format) { + return true; +} + bool OpenGLDriver::isProtectedTexturesSupported() { return getContext().ext.EXT_protected_textures; } diff --git a/filament/backend/src/vulkan/VulkanBlitter.cpp b/filament/backend/src/vulkan/VulkanBlitter.cpp index c4316cf1ec8..cde45ad0d87 100644 --- a/filament/backend/src/vulkan/VulkanBlitter.cpp +++ b/filament/backend/src/vulkan/VulkanBlitter.cpp @@ -26,8 +26,6 @@ #include -#include "generated/vkshaders/vkshaders.h" - using namespace bluevk; using namespace utils; @@ -40,7 +38,6 @@ namespace { inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, VkFilter filter, VulkanAttachment src, VulkanAttachment dst, const VkOffset3D srcRect[2], const VkOffset3D dstRect[2]) { - if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) { utils::slog.d << "Fast blit from=" << src.texture->getVkImage() << ",level=" << (int) src.level << " layout=" << src.getLayout() @@ -93,7 +90,6 @@ inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, inline void resolveFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, VulkanAttachment src, VulkanAttachment dst) { - if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) { utils::slog.d << "Fast blit from=" << src.texture->getVkImage() << ",level=" << (int) src.level << " layout=" << src.getLayout() diff --git a/filament/backend/src/vulkan/VulkanConstants.h b/filament/backend/src/vulkan/VulkanConstants.h index b4974950ef5..03736d558d5 100644 --- a/filament/backend/src/vulkan/VulkanConstants.h +++ b/filament/backend/src/vulkan/VulkanConstants.h @@ -47,7 +47,7 @@ // granualarity of a renderpass. You can enable this along with FVK_DEBUG_DEBUG_UTILS to take // advantage of vkCmdBegin/EndDebugUtilsLabelEXT. You can also just enable this with // FVK_DEBUG_PRINT_GROUP_MARKERS to print the current marker to stdout. -#define FVK_DEBUG_GROUP_MARKERS 0x00000002 +#define FVK_DEBUG_GROUP_MARKERS 0x00000002 #define FVK_DEBUG_TEXTURE 0x00000004 #define FVK_DEBUG_LAYOUT_TRANSITION 0x00000008 @@ -112,7 +112,7 @@ static_assert(FVK_ENABLED(FVK_DEBUG_VALIDATION)); // end dependcy checks // Shorthand for combination of enabled debug flags -#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) || FVK_ENABLED(FVK_DEBUG_TEXTURE) +#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) && FVK_ENABLED(FVK_DEBUG_TEXTURE) #define FVK_ENABLED_DEBUG_SAMPLER_NAME 1 #else #define FVK_ENABLED_DEBUG_SAMPLER_NAME 0 diff --git a/filament/backend/src/vulkan/VulkanContext.h b/filament/backend/src/vulkan/VulkanContext.h index 7c60f576b35..bdc3af691a9 100644 --- a/filament/backend/src/vulkan/VulkanContext.h +++ b/filament/backend/src/vulkan/VulkanContext.h @@ -101,8 +101,12 @@ struct VulkanContext { return (uint32_t) VK_MAX_MEMORY_TYPES; } - inline VkFormatList const& getAttachmentDepthFormats() const { - return mDepthFormats; + inline VkFormatList const& getAttachmentDepthStencilFormats() const { + return mDepthStencilFormats; + } + + inline VkFormatList const& getBlittableDepthStencilFormats() const { + return mBlittableDepthStencilFormats; } inline VkPhysicalDeviceLimits const& getPhysicalDeviceLimits() const noexcept { @@ -131,7 +135,8 @@ struct VulkanContext { bool mDebugMarkersSupported = false; bool mDebugUtilsSupported = false; - VkFormatList mDepthFormats; + VkFormatList mDepthStencilFormats; + VkFormatList mBlittableDepthStencilFormats; // For convenience so that VulkanPlatform can initialize the private fields. friend class VulkanPlatform; diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 2b2284e092e..dada1d695cd 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -921,6 +921,11 @@ bool VulkanDriver::isDepthStencilResolveSupported() { return false; } +bool VulkanDriver::isDepthStencilBlitSupported(TextureFormat format) { + auto const& formats = mContext.getBlittableDepthStencilFormats(); + return std::find(formats.begin(), formats.end(), getVkFormat(format)) != formats.end(); +} + bool VulkanDriver::isProtectedTexturesSupported() { return false; } @@ -1807,7 +1812,7 @@ void VulkanDriver::bindPipeline(PipelineState pipelineState) { // This fallback path is very flaky because the dummy texture might not have // matching characteristics. (e.g. if the missing texture is a 3D texture) if (UTILS_UNLIKELY(texture->getPrimaryImageLayout() == VulkanLayout::UNDEFINED)) { -#if FVK_ENABLED(FVK_DEBUG_TEXTURE) +#if FVK_ENABLED(FVK_DEBUG_TEXTURE) && FVK_ENABLED_DEBUG_SAMPLER_NAME utils::slog.w << "Uninitialized texture bound to '" << bindingToName[binding] << "'"; utils::slog.w << " in material '" << program->name.c_str() << "'"; utils::slog.w << " at binding point " << +binding << utils::io::endl; diff --git a/filament/backend/src/vulkan/VulkanImageUtility.cpp b/filament/backend/src/vulkan/VulkanImageUtility.cpp index 108a3f2b96a..ada9de0ae26 100644 --- a/filament/backend/src/vulkan/VulkanImageUtility.cpp +++ b/filament/backend/src/vulkan/VulkanImageUtility.cpp @@ -217,7 +217,7 @@ bool operator<(const VkImageSubresourceRange& a, const VkImageSubresourceRange& return false; } -#if FVK_ENABLED(FVK_DEBUG_LAYOUT_TRANSITION | FVK_DEBUG_TEXTURE) +#if FVK_ENABLED(FVK_DEBUG_LAYOUT_TRANSITION) || FVK_ENABLED(FVK_DEBUG_TEXTURE) #define CASE(VALUE) \ case filament::backend::VulkanLayout::VALUE: { \ out << #VALUE; \ diff --git a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp index 1510669b285..c58e5d19d0a 100644 --- a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp +++ b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp @@ -344,13 +344,13 @@ std::tuple pruneExtensions(VkPhysicalDevice device, ExtensionSet newInstExts = instExts; ExtensionSet newDeviceExts = deviceExts; -#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) +#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) // debugUtils and debugMarkers extensions are used mutually exclusively. if (newInstExts.find(VK_EXT_DEBUG_UTILS_EXTENSION_NAME) != newInstExts.end() && newDeviceExts.find(VK_EXT_DEBUG_MARKER_EXTENSION_NAME) != newDeviceExts.end()) { newDeviceExts.erase(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); } -#endif +#endif #if FVK_ENABLED(FVK_DEBUG_VALIDATION) // debugMarker must also request debugReport the instance extension. So check if that's present. @@ -500,7 +500,7 @@ VkPhysicalDevice selectPhysicalDevice(VkInstance instance, return device; } -VkFormatList findAttachmentDepthFormats(VkPhysicalDevice device) { +VkFormatList findAttachmentDepthStencilFormats(VkPhysicalDevice device) { VkFormatFeatureFlags const features = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; // The ordering here indicates the preference of choosing depth+stencil format. @@ -524,6 +524,28 @@ VkFormatList findAttachmentDepthFormats(VkPhysicalDevice device) { return ret; } +VkFormatList findBlittableDepthStencilFormats(VkPhysicalDevice device) { + std::vector selectedFormats; + VkFormatFeatureFlags const required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + for (VkFormat format = (VkFormat) 1;;) { + if (isVkDepthFormat(format)) { + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(device, format, &props); + if ((props.optimalTilingFeatures & required) == required) { + selectedFormats.push_back(format); + } + } + format = (VkFormat) (1 + (int) format); + if (format == VK_FORMAT_ASTC_12x12_SRGB_BLOCK) { + break; + } + } + VkFormatList ret(selectedFormats.size()); + std::copy(selectedFormats.begin(), selectedFormats.end(), ret.begin()); + return ret; +} + }// anonymous namespace using SwapChainPtr = VulkanPlatform::SwapChainPtr; @@ -669,9 +691,11 @@ Driver* VulkanPlatform::createDriver(void* sharedContext, "Debug utils should not be enabled in release build."); #endif - context.mDepthFormats = findAttachmentDepthFormats(mImpl->mPhysicalDevice); + context.mDepthStencilFormats = findAttachmentDepthStencilFormats(mImpl->mPhysicalDevice); + context.mBlittableDepthStencilFormats = + findBlittableDepthStencilFormats(mImpl->mPhysicalDevice); - assert_invariant(context.mDepthFormats.size() > 0); + assert_invariant(context.mDepthStencilFormats.size() > 0); #if FVK_ENABLED(FVK_DEBUG_VALIDATION) printDepthFormats(mImpl->mPhysicalDevice); diff --git a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp index a5a31a8d95f..5627e2610c2 100644 --- a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp +++ b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp @@ -243,7 +243,7 @@ VkResult VulkanPlatformSurfaceSwapChain::create() { mSwapChainBundle.colors = enumerate(vkGetSwapchainImagesKHR, mDevice, mSwapchain); mSwapChainBundle.colorFormat = surfaceFormat.format; mSwapChainBundle.depthFormat = - selectDepthFormat(mContext.getAttachmentDepthFormats(), mHasStencil); + selectDepthFormat(mContext.getAttachmentDepthStencilFormats(), mHasStencil); mSwapChainBundle.depth = createImage(mSwapChainBundle.extent, mSwapChainBundle.depthFormat); slog.i << "vkCreateSwapchain" @@ -330,7 +330,7 @@ VulkanPlatformHeadlessSwapChain::VulkanPlatformHeadlessSwapChain(VulkanContext c bool const hasStencil = (flags & backend::SWAP_CHAIN_HAS_STENCIL_BUFFER) != 0; mSwapChainBundle.depthFormat = - selectDepthFormat(mContext.getAttachmentDepthFormats(), hasStencil); + selectDepthFormat(mContext.getAttachmentDepthStencilFormats(), hasStencil); mSwapChainBundle.depth = createImage(extent, mSwapChainBundle.depthFormat); } diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp index e570a9f632f..9dd8b9d5ff9 100644 --- a/filament/src/PostProcessManager.cpp +++ b/filament/src/PostProcessManager.cpp @@ -265,6 +265,7 @@ static const PostProcessManager::MaterialInfo sMaterialList[] = { { "bilateralBlur", MATERIAL(BILATERALBLUR) }, { "bilateralBlurBentNormals", MATERIAL(BILATERALBLURBENTNORMALS) }, { "blitArray", MATERIAL(BLITARRAY) }, + { "blitDepth", MATERIAL(BLITDEPTH) }, { "bloomDownsample", MATERIAL(BLOOMDOWNSAMPLE) }, { "bloomDownsample2x", MATERIAL(BLOOMDOWNSAMPLE2X) }, { "bloomDownsample9", MATERIAL(BLOOMDOWNSAMPLE9) }, @@ -720,46 +721,16 @@ FrameGraphId PostProcessManager::screenSpaceAmbientOcclusion( * attachment, even if writes are not enabled. This restriction is lifted on desktop GL and * Vulkan. The Metal situation is unclear. * In this case, we need to duplicate the depth texture to use it as an attachment. - * The pass below that does this is automatically culled if not needed, which is decided by - * each backend. + * + * This is also needed in Vulkan for a similar reason. */ + FrameGraphId duplicateDepthOutput = {}; + if (!mWorkaroundAllowReadOnlyAncillaryFeedbackLoop) { + duplicateDepthOutput = blitDepth(fg, depth); + } - struct DuplicateDepthPassData { - FrameGraphId input; - FrameGraphId output; - }; - - // Needed for Vulkan and GLES. Some GLES implementations don't need it. Never needed for Metal. - auto& duplicateDepthPass = fg.addPass("Duplicate Depth Pass", - [&](FrameGraph::Builder& builder, auto& data) { - data.input = builder.read(depth, - FrameGraphTexture::Usage::BLIT_SRC); - - auto desc = builder.getDescriptor(data.input); - desc.levels = 1; // only copy the base level - - // create a new buffer for the copy - data.output = builder.createTexture("Depth Texture Copy", desc); - - // output is an attachment - data.output = builder.write(data.output, - FrameGraphTexture::Usage::BLIT_DST); - }, - [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { - auto const& src = resources.getTexture(data.input); - auto const& dst = resources.getTexture(data.output); - auto const& srcSubDesc = resources.getSubResourceDescriptor(data.input); - auto const& dstSubDesc = resources.getSubResourceDescriptor(data.output); - auto const& desc = resources.getDescriptor(data.output); - assert_invariant(desc.samples == resources.getDescriptor(data.input).samples); - // here we can guarantee that src and dst format and size match, by construction. - driver.blit( - dst, dstSubDesc.level, dstSubDesc.layer, { 0, 0 }, - src, srcSubDesc.level, srcSubDesc.layer, { 0, 0 }, - { desc.width, desc.height }); - }); - - auto& SSAOPass = fg.addPass("SSAO Pass", + auto& SSAOPass = fg.addPass( + "SSAO Pass", [&](FrameGraph::Builder& builder, auto& data) { auto const& desc = builder.getDescriptor(depth); @@ -788,10 +759,7 @@ FrameGraphId PostProcessManager::screenSpaceAmbientOcclusion( // reading into it even though they were not written in the depth buffer. // The bilateral filter in the blur pass will ignore pixels at infinity. - auto depthAttachment = data.depth; - if (!mWorkaroundAllowReadOnlyAncillaryFeedbackLoop) { - depthAttachment = duplicateDepthPass->output; - } + auto depthAttachment = duplicateDepthOutput ? duplicateDepthOutput : data.depth; depthAttachment = builder.read(depthAttachment, FrameGraphTexture::Usage::DEPTH_ATTACHMENT); @@ -801,8 +769,7 @@ FrameGraphId PostProcessManager::screenSpaceAmbientOcclusion( .clearFlags = TargetBufferFlags::COLOR0 | TargetBufferFlags::COLOR1 }); }, - [=](FrameGraphResources const& resources, - auto const& data, DriverApi& driver) { + [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { auto depth = resources.getTexture(data.depth); auto ssao = resources.getRenderPassInfo(); auto const& desc = resources.getDescriptor(data.depth); @@ -3043,7 +3010,7 @@ FrameGraphId PostProcessManager::blit(FrameGraph& fg, bool tr auto& ppQuadBlit = fg.addPass("blitting", [&](FrameGraph::Builder& builder, auto& data) { data.input = builder.sample(input); - data.output = builder.createTexture("upscaled output", outDesc); + data.output = builder.createTexture("blit output", outDesc); data.output = builder.write(data.output, FrameGraphTexture::Usage::COLOR_ATTACHMENT); builder.declareRenderPass(builder.getName(data.output), { @@ -3087,6 +3054,87 @@ FrameGraphId PostProcessManager::blit(FrameGraph& fg, bool tr return ppQuadBlit->output; } +FrameGraphId PostProcessManager::blitDepth(FrameGraph& fg, + FrameGraphId input) noexcept { + auto const& inputDesc = fg.getDescriptor(input); + filament::Viewport const vp = {0, 0, inputDesc.width, inputDesc.height}; + bool const hardwareBlitSupported = + mEngine.getDriverApi().isDepthStencilBlitSupported(inputDesc.format); + + struct BlitData { + FrameGraphId input; + FrameGraphId output; + }; + + if (hardwareBlitSupported) { + auto& depthPass = fg.addPass( + "Depth Blit", + [&](FrameGraph::Builder& builder, auto& data) { + data.input = builder.read(input, FrameGraphTexture::Usage::BLIT_SRC); + + auto desc = builder.getDescriptor(data.input); + desc.levels = 1;// only copy the base level + + // create a new buffer for the copy + data.output = builder.createTexture("depth blit output", desc); + + // output is an attachment + data.output = builder.write(data.output, FrameGraphTexture::Usage::BLIT_DST); + }, + [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { + auto const& src = resources.getTexture(data.input); + auto const& dst = resources.getTexture(data.output); + auto const& srcSubDesc = resources.getSubResourceDescriptor(data.input); + auto const& dstSubDesc = resources.getSubResourceDescriptor(data.output); + auto const& desc = resources.getDescriptor(data.output); + assert_invariant(desc.samples == resources.getDescriptor(data.input).samples); + // here we can guarantee that src and dst format and size match, by + // construction. + driver.blit( + dst, dstSubDesc.level, dstSubDesc.layer, { 0, 0 }, + src, srcSubDesc.level, srcSubDesc.layer, { 0, 0 }, + { desc.width, desc.height }); + }); + return depthPass->output; + } + // Otherwise, we would do a shader-based blit. + + auto& ppQuadBlit = fg.addPass( + "Depth Blit (Shader)", + [&](FrameGraph::Builder& builder, auto& data) { + data.input = builder.sample(input); + // Note that this is a same size/format blit. + auto const& outputDesc = inputDesc; + data.output = builder.createTexture("depth blit output", outputDesc); + data.output = + builder.write(data.output, FrameGraphTexture::Usage::DEPTH_ATTACHMENT); + builder.declareRenderPass(builder.getName(data.output), + {.attachments = {.depth = {data.output}}}); + }, + [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { + auto depth = resources.getTexture(data.input); + auto const& inputDesc = resources.getDescriptor(data.input); + auto out = resources.getRenderPassInfo(); + + // -------------------------------------------------------------------------------- + // set uniforms + PostProcessMaterial const& material = getPostProcessMaterial("blitDepth"); + auto* mi = material.getMaterialInstance(mEngine); + mi->setParameter("depth", depth, + { + .filterMag = SamplerMagFilter::NEAREST, + .filterMin = SamplerMinFilter::NEAREST, + }); + mi->setParameter("viewport", + float4{float(vp.left) / inputDesc.width, + float(vp.bottom) / inputDesc.height, float(vp.width) / inputDesc.width, + float(vp.height) / inputDesc.height}); + commitAndRender(out, material, driver); + }); + + return ppQuadBlit->output; +} + FrameGraphId PostProcessManager::resolve(FrameGraph& fg, const char* outputBufferName, FrameGraphId input, FrameGraphTexture::Descriptor outDesc) noexcept { @@ -3283,7 +3331,7 @@ FrameGraphId PostProcessManager::debugShadowCascades(FrameGra return debugShadowCascadePass->output; } -FrameGraphId PostProcessManager::debugCombineArrayTexture(FrameGraph& fg, +FrameGraphId PostProcessManager::debugCombineArrayTexture(FrameGraph& fg, bool translucent, FrameGraphId input, filament::Viewport const& vp, FrameGraphTexture::Descriptor const& outDesc, SamplerMagFilter filterMag, diff --git a/filament/src/PostProcessManager.h b/filament/src/PostProcessManager.h index 432a9e3fff3..76f5bcd325f 100644 --- a/filament/src/PostProcessManager.h +++ b/filament/src/PostProcessManager.h @@ -250,13 +250,17 @@ class PostProcessManager { FrameGraphTexture::Descriptor const& outDesc, bool translucent); - // upscale/downscale blitter using shaders + // color blitter using shaders FrameGraphId blit(FrameGraph& fg, bool translucent, FrameGraphId input, filament::Viewport const& vp, FrameGraphTexture::Descriptor const& outDesc, backend::SamplerMagFilter filterMag, backend::SamplerMinFilter filterMin) noexcept; + // depth blitter using shaders + FrameGraphId blitDepth(FrameGraph& fg, + FrameGraphId input) noexcept; + // Resolves base level of input and outputs a texture from outDesc. // outDesc with, height, format and samples will be overridden. FrameGraphId resolve(FrameGraph& fg, diff --git a/filament/src/materials/blitDepth.mat b/filament/src/materials/blitDepth.mat new file mode 100644 index 00000000000..c8536ad545c --- /dev/null +++ b/filament/src/materials/blitDepth.mat @@ -0,0 +1,42 @@ +material { + name : blitDepth, + parameters : [ + { + type : sampler2d, + name : depth, + precision: medium + }, + { + type : float4, + name : viewport, + precision: high + } + ], + outputs : [ + { + name : depth, + target : depth, + type : float + } + ], + variables : [ + vertex + ], + culling: none, + depthWrite : true, + depthCulling : false, + domain: postprocess, +} + +vertex { + void postProcessVertex(inout PostProcessVertexInputs postProcess) { + postProcess.vertex.xy = materialParams.viewport.xy + postProcess.normalizedUV * materialParams.viewport.zw; + postProcess.vertex.xy = uvToRenderTargetUV(postProcess.vertex.xy); + } +} + +fragment { + void postProcess(inout PostProcessInputs postProcess) { + postProcess.depth = textureLod(materialParams_depth, variable_vertex.xy, 0.0).r; + } +} From bfab9f9c320803e098d4fff41322f5562551bea8 Mon Sep 17 00:00:00 2001 From: Eliza Velasquez Date: Thu, 25 Apr 2024 14:45:14 -0700 Subject: [PATCH 13/31] engine: Add isPaused() --- NEW_RELEASE_NOTES.md | 2 ++ android/filament-android/src/main/cpp/Engine.cpp | 7 +++++++ .../java/com/google/android/filament/Engine.java | 12 ++++++++++++ .../include/private/backend/CommandBufferQueue.h | 1 + filament/backend/src/CommandBufferQueue.cpp | 5 +++++ filament/include/filament/Engine.h | 9 +++++++++ filament/src/Engine.cpp | 5 +++++ filament/src/details/Engine.cpp | 6 ++++++ filament/src/details/Engine.h | 1 + 9 files changed, 48 insertions(+) diff --git a/NEW_RELEASE_NOTES.md b/NEW_RELEASE_NOTES.md index 4a1a9c7fa7e..494e8046aa6 100644 --- a/NEW_RELEASE_NOTES.md +++ b/NEW_RELEASE_NOTES.md @@ -7,3 +7,5 @@ for next branch cut* header. appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md). ## Release notes for next branch cut + +- engine: Add `isPaused()` diff --git a/android/filament-android/src/main/cpp/Engine.cpp b/android/filament-android/src/main/cpp/Engine.cpp index 713baa53edb..ef67358079d 100644 --- a/android/filament-android/src/main/cpp/Engine.cpp +++ b/android/filament-android/src/main/cpp/Engine.cpp @@ -406,6 +406,13 @@ Java_com_google_android_filament_Engine_nFlush(JNIEnv*, jclass, engine->flush(); } +extern "C" JNIEXPORT jboolean JNICALL +Java_com_google_android_filament_Engine_nIsPaused(JNIEnv*, jclass, + jlong nativeEngine) { + Engine* engine = (Engine*) nativeEngine; + return (jboolean)engine->isPaused(); +} + extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetPaused(JNIEnv*, jclass, jlong nativeEngine, jboolean paused) { diff --git a/android/filament-android/src/main/java/com/google/android/filament/Engine.java b/android/filament-android/src/main/java/com/google/android/filament/Engine.java index a7ec77a71cd..9f8f478009e 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/Engine.java +++ b/android/filament-android/src/main/java/com/google/android/filament/Engine.java @@ -1227,6 +1227,17 @@ public void flush() { nFlush(getNativeObject()); } + /** + * Get paused state of rendering thread. + * + *

Warning: This is an experimental API. + * + * @see #setPaused + */ + public boolean isPaused() { + return nIsPaused(getNativeObject()); + } + /** * Pause or resume the rendering thread. * @@ -1319,6 +1330,7 @@ private static void assertDestroy(boolean success) { private static native void nDestroyEntity(long nativeEngine, int entity); private static native void nFlushAndWait(long nativeEngine); private static native void nFlush(long nativeEngine); + private static native boolean nIsPaused(long nativeEngine); private static native void nSetPaused(long nativeEngine, boolean paused); private static native long nGetTransformManager(long nativeEngine); private static native long nGetLightManager(long nativeEngine); diff --git a/filament/backend/include/private/backend/CommandBufferQueue.h b/filament/backend/include/private/backend/CommandBufferQueue.h index 92bf7e1488c..e8ff9aa896c 100644 --- a/filament/backend/include/private/backend/CommandBufferQueue.h +++ b/filament/backend/include/private/backend/CommandBufferQueue.h @@ -82,6 +82,7 @@ class CommandBufferQueue { void requestExit(); // suspend or unsuspend the queue. + bool isPaused() const noexcept; void setPaused(bool paused); bool isExitRequested() const; diff --git a/filament/backend/src/CommandBufferQueue.cpp b/filament/backend/src/CommandBufferQueue.cpp index b721ce0c50f..62af6499808 100644 --- a/filament/backend/src/CommandBufferQueue.cpp +++ b/filament/backend/src/CommandBufferQueue.cpp @@ -57,6 +57,11 @@ void CommandBufferQueue::requestExit() { mCondition.notify_one(); } +bool CommandBufferQueue::isPaused() const noexcept { + std::lock_guard const lock(mLock); + return mPaused; +} + void CommandBufferQueue::setPaused(bool paused) { std::lock_guard const lock(mLock); if (paused) { diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h index 033cb8066fd..e5b142c3147 100644 --- a/filament/include/filament/Engine.h +++ b/filament/include/filament/Engine.h @@ -867,6 +867,15 @@ class UTILS_PUBLIC Engine { */ void flush(); + /** + * Get paused state of rendering thread. + * + *

Warning: This is an experimental API. + * + * @see setPaused + */ + bool isPaused() const noexcept; + /** * Pause or resume rendering thread. * diff --git a/filament/src/Engine.cpp b/filament/src/Engine.cpp index d0042f63320..ca370bc85cc 100644 --- a/filament/src/Engine.cpp +++ b/filament/src/Engine.cpp @@ -308,6 +308,11 @@ utils::JobSystem& Engine::getJobSystem() noexcept { return downcast(this)->getJobSystem(); } +bool Engine::isPaused() const noexcept { + ASSERT_PRECONDITION(UTILS_HAS_THREADING, "Pause is meant for multi-threaded platforms."); + return downcast(this)->isPaused(); +} + void Engine::setPaused(bool paused) { ASSERT_PRECONDITION(UTILS_HAS_THREADING, "Pause is meant for multi-threaded platforms."); downcast(this)->setPaused(paused); diff --git a/filament/src/details/Engine.cpp b/filament/src/details/Engine.cpp index 7a97aedd7e6..d57df19550a 100644 --- a/filament/src/details/Engine.cpp +++ b/filament/src/details/Engine.cpp @@ -581,6 +581,8 @@ void FEngine::flush() { } void FEngine::flushAndWait() { + ASSERT_PRECONDITION(!mCommandBufferQueue.isPaused(), + "Cannot call flushAndWait() when rendering thread is paused!"); #if defined(__ANDROID__) @@ -1218,6 +1220,10 @@ void FEngine::destroy(FEngine* engine) { } } +bool FEngine::isPaused() const noexcept { + return mCommandBufferQueue.isPaused(); +} + void FEngine::setPaused(bool paused) { mCommandBufferQueue.setPaused(paused); } diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h index c31549e9da5..88434071a5e 100644 --- a/filament/src/details/Engine.h +++ b/filament/src/details/Engine.h @@ -344,6 +344,7 @@ class FEngine : public Engine { void destroy(utils::Entity e); + bool isPaused() const noexcept; void setPaused(bool paused); void flushAndWait(); From bf8bcef35ca2e7b04a557864ec24de42007f2ad2 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Thu, 25 Apr 2024 16:42:55 -0700 Subject: [PATCH 14/31] Add option to define a backend debug flag (#7798) To allow easy enabling/disabling of debug options like vulkan validation, android systrace, debug printing, and others, we introduce an option to add a preprocessor flag so that a backend can (optionally) use it to manage debug options. --- CMakeLists.txt | 10 ++++++++++ build.sh | 24 ++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f67e61d4ce6..348815f54cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,10 @@ set(FILAMENT_METAL_HANDLE_ARENA_SIZE_IN_MB "8" CACHE STRING "Size of the Metal handle arena, default 8." ) +set(FILAMENT_BACKEND_DEBUG_FLAG "" CACHE STRING + "A debug flag meant for enabling/disabling backend debugging paths" +) + # Enable exceptions by default in spirv-cross. set(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS OFF) @@ -548,6 +552,12 @@ if (FILAMENT_SAMPLES_STEREO_TYPE STREQUAL "multiview") set(FILAMENT_ENABLE_MULTIVIEW ON) endif () +# Define backend flag for debug only +if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT FILAMENT_BACKEND_DEBUG_FLAG STREQUAL "") + add_definitions(-DFILAMENT_BACKEND_DEBUG_FLAG=${FILAMENT_BACKEND_DEBUG_FLAG}) + unset(FILAMENT_BACKEND_DEBUG_FLAG) +endif() + # ================================================================================================== # Material compilation flags # ================================================================================================== diff --git a/build.sh b/build.sh index 56562cc1c6c..434a0045fa3 100755 --- a/build.sh +++ b/build.sh @@ -61,6 +61,11 @@ function print_help { echo " -b" echo " Enable Address and Undefined Behavior Sanitizers (asan/ubsan) for debugging." echo " This is only for the desktop build." + echo " -x value" + echo " Define a preprocessor flag FILAMENT_BACKEND_DEBUG_FLAG with [value]. This is useful for" + echo " enabling debug paths in the backend from the build script. For example, make a" + echo " systrace-enabled build without directly changing #defines. Remember to add -f when" + echo " changing this option." echo "" echo "Build types:" echo " release" @@ -172,6 +177,8 @@ MATOPT_GRADLE_OPTION="" ASAN_UBSAN_OPTION="" +BACKEND_DEBUG_FLAG_OPTION="" + IOS_BUILD_SIMULATOR=false BUILD_UNIVERSAL_LIBRARIES=false @@ -231,6 +238,7 @@ function build_desktop_target { ${MATDBG_OPTION} \ ${MATOPT_OPTION} \ ${ASAN_UBSAN_OPTION} \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ${architectures} \ ../.. ln -sf "out/cmake-${lc_target}/compile_commands.json" \ @@ -289,6 +297,7 @@ function build_webgl_with_target { -DCMAKE_BUILD_TYPE="$1" \ -DCMAKE_INSTALL_PREFIX="../webgl-${lc_target}/filament" \ -DWEBGL=1 \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ../.. ln -sf "out/cmake-webgl-${lc_target}/compile_commands.json" \ ../../compile_commands.json @@ -363,6 +372,7 @@ function build_android_target { ${MATDBG_OPTION} \ ${MATOPT_OPTION} \ ${VULKAN_ANDROID_OPTION} \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ../.. ln -sf "out/cmake-android-${lc_target}-${arch}/compile_commands.json" \ ../../compile_commands.json @@ -597,6 +607,7 @@ function build_ios_target { -DCMAKE_TOOLCHAIN_FILE=../../third_party/clang/iOS.cmake \ ${MATDBG_OPTION} \ ${MATOPT_OPTION} \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ../.. ln -sf "out/cmake-ios-${lc_target}-${arch}/compile_commands.json" \ ../../compile_commands.json @@ -730,6 +741,13 @@ function validate_build_command { exit 1 fi fi + + # Make sure FILAMENT_BACKEND_DEBUG_FLAG is only meant for debug builds + if [[ "${ISSUE_DEBUG_BUILD}" != "true" ]] && [[ ! -z "${BACKEND_DEBUG_FLAG_OPTION}" ]]; then + echo "Error: cannot specify FILAMENT_BACKEND_DEBUG_FLAG in non-debug build" + exit 1 + fi + set -e } @@ -776,7 +794,7 @@ function check_debug_release_build { pushd "$(dirname "$0")" > /dev/null -while getopts ":hacCfgijmp:q:uvslwtedk:b" opt; do +while getopts ":hacCfgijmp:q:uvslwtedk:bx:" opt; do case ${opt} in h) print_help @@ -840,7 +858,7 @@ while getopts ":hacCfgijmp:q:uvslwtedk:b" opt; do echo "Platform must be one of [desktop|android|ios|webgl|all]" echo "" exit 1 - ;; + ;; esac done ;; @@ -918,6 +936,8 @@ while getopts ":hacCfgijmp:q:uvslwtedk:b" opt; do b) ASAN_UBSAN_OPTION="-DFILAMENT_ENABLE_ASAN_UBSAN=ON" echo "Enabled ASAN/UBSAN" ;; + x) BACKEND_DEBUG_FLAG_OPTION="-DFILAMENT_BACKEND_DEBUG_FLAG=${OPTARG}" + ;; \?) echo "Invalid option: -${OPTARG}" >&2 echo "" From 420f06bef33604a08a0be3e50d45f1be2727e950 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Fri, 26 Apr 2024 11:15:51 -0700 Subject: [PATCH 15/31] implement 16 and 8 bits clz, ctz and popcount This is needed for utils::bitset8 and bitset16. --- libs/utils/include/utils/algorithm.h | 82 ++++++++++++++++++++++++---- libs/utils/test/test_algorithm.cpp | 26 +++++++++ 2 files changed, 97 insertions(+), 11 deletions(-) diff --git a/libs/utils/include/utils/algorithm.h b/libs/utils/include/utils/algorithm.h index ea5ca44fd5c..7a747b84ceb 100644 --- a/libs/utils/include/utils/algorithm.h +++ b/libs/utils/include/utils/algorithm.h @@ -22,6 +22,7 @@ #include // for std::enable_if #include +#include #include namespace utils { @@ -43,9 +44,15 @@ constexpr inline T clz(T x) noexcept { static_assert(sizeof(T) * CHAR_BIT <= 128, "details::clz() only support up to 128 bits"); x |= (x >> 1u); x |= (x >> 2u); - x |= (x >> 4u); - x |= (x >> 8u); - x |= (x >> 16u); + if constexpr (sizeof(T) * CHAR_BIT >= 8) { // just to silence compiler warning + x |= (x >> 4u); + } + if constexpr (sizeof(T) * CHAR_BIT >= 16) { // just to silence compiler warning + x |= (x >> 8u); + } + if constexpr (sizeof(T) * CHAR_BIT >= 32) { // just to silence compiler warning + x |= (x >> 16u); + } if constexpr (sizeof(T) * CHAR_BIT >= 64) { // just to silence compiler warning x |= (x >> 32u); } @@ -67,11 +74,15 @@ constexpr inline T ctz(T x) noexcept { x &= -x; #endif if (x) c--; - if (sizeof(T) * CHAR_BIT >= 64) { + if constexpr (sizeof(T) * CHAR_BIT >= 64) { if (x & T(0x00000000FFFFFFFF)) c -= 32; } - if (x & T(0x0000FFFF0000FFFF)) c -= 16; - if (x & T(0x00FF00FF00FF00FF)) c -= 8; + if constexpr (sizeof(T) * CHAR_BIT >= 32) { + if (x & T(0x0000FFFF0000FFFF)) c -= 16; + } + if constexpr (sizeof(T) * CHAR_BIT >= 16) { + if (x & T(0x00FF00FF00FF00FF)) c -= 8; + } if (x & T(0x0F0F0F0F0F0F0F0F)) c -= 4; if (x & T(0x3333333333333333)) c -= 2; if (x & T(0x5555555555555555)) c -= 1; @@ -80,6 +91,24 @@ constexpr inline T ctz(T x) noexcept { } // namespace details +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE clz(unsigned char x) noexcept { +#if __has_builtin(__builtin_clz) + return __builtin_clz((unsigned int)x) - 24; +#else + return details::clz(x); +#endif +} + +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE clz(unsigned short x) noexcept { +#if __has_builtin(__builtin_clz) + return __builtin_clz((unsigned int)x) - 16; +#else + return details::clz(x); +#endif +} + constexpr inline UTILS_PUBLIC UTILS_PURE unsigned int UTILS_ALWAYS_INLINE clz(unsigned int x) noexcept { #if __has_builtin(__builtin_clz) @@ -107,6 +136,24 @@ unsigned long long UTILS_ALWAYS_INLINE clz(unsigned long long x) noexcept { #endif } +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE ctz(unsigned char x) noexcept { +#if __has_builtin(__builtin_ctz) + return __builtin_ctz(x); +#else + return details::ctz(x); +#endif +} + +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE ctz(unsigned short x) noexcept { +#if __has_builtin(__builtin_ctz) + return __builtin_ctz(x); +#else + return details::ctz(x); +#endif +} + constexpr inline UTILS_PUBLIC UTILS_PURE unsigned int UTILS_ALWAYS_INLINE ctz(unsigned int x) noexcept { #if __has_builtin(__builtin_ctz) @@ -134,6 +181,24 @@ unsigned long long UTILS_ALWAYS_INLINE ctz(unsigned long long x) noexcept { #endif } +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE popcount(unsigned char x) noexcept { +#if __has_builtin(__builtin_popcount) + return __builtin_popcount(x); +#else + return details::popcount(x); +#endif +} + +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE popcount(unsigned short x) noexcept { +#if __has_builtin(__builtin_popcount) + return __builtin_popcount(x); +#else + return details::popcount(x); +#endif +} + constexpr inline UTILS_PUBLIC UTILS_PURE unsigned int UTILS_ALWAYS_INLINE popcount(unsigned int x) noexcept { #if __has_builtin(__builtin_popcount) @@ -161,11 +226,6 @@ unsigned long long UTILS_ALWAYS_INLINE popcount(unsigned long long x) noexcept { #endif } -constexpr inline UTILS_PUBLIC UTILS_PURE -uint8_t UTILS_ALWAYS_INLINE popcount(uint8_t x) noexcept { - return (uint8_t)popcount((unsigned int)x); -} - template::value && std::is_unsigned::value>> constexpr inline UTILS_PUBLIC UTILS_PURE diff --git a/libs/utils/test/test_algorithm.cpp b/libs/utils/test/test_algorithm.cpp index fa36aff0dce..83dcc7b65bc 100644 --- a/libs/utils/test/test_algorithm.cpp +++ b/libs/utils/test/test_algorithm.cpp @@ -20,6 +20,8 @@ #include +#include + using namespace utils; template @@ -58,6 +60,20 @@ TEST(AlgorithmTest, clz) { EXPECT_EQ(j, details::clz(i|1)); EXPECT_EQ(j, count_leading_zeros(i)); } + for (uint16_t i = 1, j = 15; j < 16; i *= 2, j--) { + EXPECT_EQ(j, clz(i)); + EXPECT_EQ(j, clz(uint16_t(i|1))); + EXPECT_EQ(j, details::clz(i)); + EXPECT_EQ(j, details::clz(uint16_t(i|1))); + EXPECT_EQ(j, count_leading_zeros(i)); + } + for (uint8_t i = 1, j = 7; j < 8; i *= 2, j--) { + EXPECT_EQ(j, clz(i)); + EXPECT_EQ(j, clz(uint8_t(i|1))); + EXPECT_EQ(j, details::clz(i)); + EXPECT_EQ(j, details::clz(uint8_t(i|1))); + EXPECT_EQ(j, count_leading_zeros(i)); + } } TEST(AlgorithmTest, details_ctz) { @@ -80,6 +96,16 @@ TEST(AlgorithmTest, ctz) { EXPECT_EQ(j, details::ctz(i)); EXPECT_EQ(j, count_trailing_zeros(i)); } + for (uint16_t i = 1, j = 0; j < 16; i *= 2, j++) { + EXPECT_EQ(j, ctz(i)); + EXPECT_EQ(j, details::ctz(i)); + EXPECT_EQ(j, count_trailing_zeros(i)); + } + for (uint8_t i = 1, j = 0; j < 8; i *= 2, j++) { + EXPECT_EQ(j, ctz(i)); + EXPECT_EQ(j, details::ctz(i)); + EXPECT_EQ(j, count_trailing_zeros(i)); + } } TEST(AlgorithmTest, details_popcount) { From 22d99bac3d0492c0284f7654fa0de0694643e220 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Wed, 24 Apr 2024 11:30:11 -0700 Subject: [PATCH 16/31] simplify generateCommands() It's been a while that generateCommands() can only generate either the depth or color pass but not both at once; we can simplify/de-dup the code by leveraging that. --- filament/src/RenderPass.cpp | 303 ++++++++++++++++-------------------- filament/src/RenderPass.h | 2 +- 2 files changed, 132 insertions(+), 173 deletions(-) diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp index 2b49fd15956..03ad3037162 100644 --- a/filament/src/RenderPass.cpp +++ b/filament/src/RenderPass.cpp @@ -213,8 +213,8 @@ void RenderPass::appendCommands(FEngine& engine, // This must be done from the main thread. for (Command const* first = curr, *last = curr + commandCount ; first != last ; ++first) { if (UTILS_LIKELY((first->key & CUSTOM_MASK) == uint64_t(CustomCommand::PASS))) { - auto ma = first->primitive.mi->getMaterial(); - ma->prepareProgram(first->primitive.materialVariant); + auto ma = first->info.mi->getMaterial(); + ma->prepareProgram(first->info.materialVariant); } } } @@ -294,18 +294,18 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { // because the morphing/skinning data for comparison is not easily accessible. // Additionally, we can't have a different skinning/morphing per instance anyway. Command const* e = curr + 1; - if (UTILS_LIKELY(!curr->primitive.hasSkinning && !curr->primitive.hasMorphing)) { + if (UTILS_LIKELY(!curr->info.hasSkinning && !curr->info.hasMorphing)) { // we can't have nice things! No more than maxInstanceCount due to UBO size limits e = std::find_if_not(curr, std::min(last, curr + maxInstanceCount), [lhs = *curr](Command const& rhs) { // primitives must be identical to be instanced. // Currently, instancing doesn't support skinning/morphing. - return lhs.primitive.mi == rhs.primitive.mi && - lhs.primitive.rph == rhs.primitive.rph && - lhs.primitive.vbih == rhs.primitive.vbih && - lhs.primitive.indexOffset == rhs.primitive.indexOffset && - lhs.primitive.indexCount == rhs.primitive.indexCount && - lhs.primitive.rasterState == rhs.primitive.rasterState; + return lhs.info.mi == rhs.info.mi && + lhs.info.rph == rhs.info.rph && + lhs.info.vbih == rhs.info.vbih && + lhs.info.indexOffset == rhs.info.indexOffset && + lhs.info.indexCount == rhs.info.indexCount && + lhs.info.rasterState == rhs.info.rasterState; }); } @@ -331,12 +331,12 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { assert_invariant(instancedPrimitiveOffset + instanceCount <= stagingBufferSize / sizeof(PerRenderableData)); for (uint32_t i = 0; i < instanceCount; i++) { - stagingBuffer[instancedPrimitiveOffset + i] = uboData[curr[i].primitive.index]; + stagingBuffer[instancedPrimitiveOffset + i] = uboData[curr[i].info.index]; } // make the first command instanced - curr[0].primitive.instanceCount = instanceCount; - curr[0].primitive.index = instancedPrimitiveOffset; + curr[0].info.instanceCount = instanceCount; + curr[0].info.index = instancedPrimitiveOffset; instancedPrimitiveOffset += instanceCount; // cancel commands that are now instances @@ -414,22 +414,22 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant, keyDraw |= makeField(ma->getRasterState().alphaToCoverage, BLENDING_MASK, BLENDING_SHIFT); cmdDraw.key = isBlendingCommand ? keyBlending : keyDraw; - cmdDraw.primitive.rasterState = ma->getRasterState(); + cmdDraw.info.rasterState = ma->getRasterState(); // for SSR pass, the blending mode of opaques (including MASKED) must be off // see Material.cpp. const bool blendingMustBeOff = !isBlendingCommand && Variant::isSSRVariant(variant); - cmdDraw.primitive.rasterState.blendFunctionSrcAlpha = blendingMustBeOff ? - BlendFunction::ONE : cmdDraw.primitive.rasterState.blendFunctionSrcAlpha; - cmdDraw.primitive.rasterState.blendFunctionDstAlpha = blendingMustBeOff ? - BlendFunction::ZERO : cmdDraw.primitive.rasterState.blendFunctionDstAlpha; - - cmdDraw.primitive.rasterState.inverseFrontFaces = inverseFrontFaces; - cmdDraw.primitive.rasterState.culling = mi->getCullingMode(); - cmdDraw.primitive.rasterState.colorWrite = mi->isColorWriteEnabled(); - cmdDraw.primitive.rasterState.depthWrite = mi->isDepthWriteEnabled(); - cmdDraw.primitive.rasterState.depthFunc = mi->getDepthFunc(); - cmdDraw.primitive.materialVariant = variant; + cmdDraw.info.rasterState.blendFunctionSrcAlpha = blendingMustBeOff ? + BlendFunction::ONE : cmdDraw.info.rasterState.blendFunctionSrcAlpha; + cmdDraw.info.rasterState.blendFunctionDstAlpha = blendingMustBeOff ? + BlendFunction::ZERO : cmdDraw.info.rasterState.blendFunctionDstAlpha; + + cmdDraw.info.rasterState.inverseFrontFaces = inverseFrontFaces; + cmdDraw.info.rasterState.culling = mi->getCullingMode(); + cmdDraw.info.rasterState.colorWrite = mi->isColorWriteEnabled(); + cmdDraw.info.rasterState.depthWrite = mi->isDepthWriteEnabled(); + cmdDraw.info.rasterState.depthFunc = mi->getDepthFunc(); + cmdDraw.info.materialVariant = variant; // we keep "RasterState::colorWrite" to the value set by material (could be disabled) } @@ -501,54 +501,55 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla Variant const variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward, uint8_t stereoEyeCount) noexcept { - // generateCommands() writes both the draw and depth commands simultaneously such that - // we go throw the list of renderables just once. - // (in principle, we could have split this method into two, at the cost of going through - // the list twice) - constexpr bool isColorPass = bool(commandTypeFlags & CommandTypeFlags::COLOR); constexpr bool isDepthPass = bool(commandTypeFlags & CommandTypeFlags::DEPTH); - static_assert(isColorPass != isDepthPass, "only color or depth pass supported"); - const bool depthContainsShadowCasters = bool(extraFlags & CommandTypeFlags::DEPTH_CONTAINS_SHADOW_CASTERS); - const bool depthFilterAlphaMaskedObjects = bool(extraFlags & CommandTypeFlags::DEPTH_FILTER_ALPHA_MASKED_OBJECTS); - const bool filterTranslucentObjects = bool(extraFlags & CommandTypeFlags::FILTER_TRANSLUCENT_OBJECTS); + bool const depthContainsShadowCasters = + bool(extraFlags & CommandTypeFlags::DEPTH_CONTAINS_SHADOW_CASTERS); + + bool const depthFilterAlphaMaskedObjects = + bool(extraFlags & CommandTypeFlags::DEPTH_FILTER_ALPHA_MASKED_OBJECTS); - auto const* const UTILS_RESTRICT soaWorldAABBCenter = soa.data(); - auto const* const UTILS_RESTRICT soaVisibility = soa.data(); - auto const* const UTILS_RESTRICT soaPrimitives = soa.data(); - auto const* const UTILS_RESTRICT soaSkinning = soa.data(); - auto const* const UTILS_RESTRICT soaMorphing = soa.data(); - auto const* const UTILS_RESTRICT soaVisibilityMask = soa.data(); - auto const* const UTILS_RESTRICT soaInstanceInfo = soa.data(); + bool const filterTranslucentObjects = + bool(extraFlags & CommandTypeFlags::FILTER_TRANSLUCENT_OBJECTS); - const bool hasShadowing = renderFlags & HAS_SHADOWING; - const bool viewInverseFrontFaces = renderFlags & HAS_INVERSE_FRONT_FACES; - const bool hasInstancedStereo = renderFlags & IS_INSTANCED_STEREOSCOPIC; + bool const hasShadowing = + renderFlags & HAS_SHADOWING; - Command cmdColor; + bool const viewInverseFrontFaces = + renderFlags & HAS_INVERSE_FRONT_FACES; + + bool const hasInstancedStereo = + renderFlags & IS_INSTANCED_STEREOSCOPIC; + + float const cameraPositionDotCameraForward = dot(cameraPosition, cameraForward); + + auto const* const UTILS_RESTRICT soaWorldAABBCenter = soa.data(); + auto const* const UTILS_RESTRICT soaVisibility = soa.data(); + auto const* const UTILS_RESTRICT soaPrimitives = soa.data(); + auto const* const UTILS_RESTRICT soaSkinning = soa.data(); + auto const* const UTILS_RESTRICT soaMorphing = soa.data(); + auto const* const UTILS_RESTRICT soaVisibilityMask = soa.data(); + auto const* const UTILS_RESTRICT soaInstanceInfo = soa.data(); + + Command cmd; - Command cmdDepth; if constexpr (isDepthPass) { - cmdDepth.primitive.materialVariant = variant; - cmdDepth.primitive.rasterState = {}; - cmdDepth.primitive.rasterState.colorWrite = Variant::isPickingVariant(variant) || Variant::isVSMVariant(variant); - cmdDepth.primitive.rasterState.depthWrite = true; - cmdDepth.primitive.rasterState.depthFunc = RasterState::DepthFunc::GE; - cmdDepth.primitive.rasterState.alphaToCoverage = false; + cmd.info.materialVariant = variant; + cmd.info.rasterState = {}; + cmd.info.rasterState.colorWrite = Variant::isPickingVariant(variant) || Variant::isVSMVariant(variant); + cmd.info.rasterState.depthWrite = true; + cmd.info.rasterState.depthFunc = RasterState::DepthFunc::GE; + cmd.info.rasterState.alphaToCoverage = false; } - const float cameraPositionDotCameraForward = dot(cameraPosition, cameraForward); - for (uint32_t i = range.first; i < range.last; ++i) { // Check if this renderable passes the visibilityMask. if (UTILS_UNLIKELY(!(soaVisibilityMask[i] & visibilityMask))) { continue; } - Variant renderableVariant = variant; - // Signed distance from camera plane to object's center. Positive distances are in front of // the camera. Some objects with a center behind the camera can still be visible // so their distance will be negative (this happens a lot for the shadow map). @@ -563,7 +564,6 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // float3 d = soaWorldAABBCenter[i] - cameraPosition; // float distance = dot(d, cameraForward); // but saves a couple of instruction, because part of the math is done outside the loop. - float distance = dot(soaWorldAABBCenter[i], cameraForward) - cameraPositionDotCameraForward; // We negate the distance to the camera in order to create a bit pattern that will // be sorted properly, this works because: @@ -575,66 +575,52 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // Here, objects close to the camera (but behind) will be drawn first. // An alternative that keeps the mathematical ordering is given here: // distanceBits ^= ((int32_t(distanceBits) >> 31) | 0x80000000u); - distance = -distance; - const uint32_t distanceBits = reinterpret_cast(distance); + float const distance = -dot(soaWorldAABBCenter[i], cameraForward) - cameraPositionDotCameraForward; + uint32_t const distanceBits = reinterpret_cast(distance); // calculate the per-primitive face winding order inversion - const bool inverseFrontFaces = viewInverseFrontFaces ^ soaVisibility[i].reversedWindingOrder; - const bool hasMorphing = soaVisibility[i].morphing; - const bool hasSkinning = soaVisibility[i].skinning; - const bool hasSkinningOrMorphing = hasSkinning || hasMorphing; - - cmdColor.key = makeField(soaVisibility[i].priority, PRIORITY_MASK, PRIORITY_SHIFT); - cmdColor.key |= makeField(soaVisibility[i].channel, CHANNEL_MASK, CHANNEL_SHIFT); - cmdColor.primitive.index = i; - cmdColor.primitive.instanceCount = - soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; - cmdColor.primitive.hasHybridInstancing = (bool)soaInstanceInfo[i].handle; - - // soaInstanceInfo[i].count is the number of instances the user has requested, either for - // manual or hybrid instancing. Instanced stereo multiplies the number of instances by the - // eye count. - if (UTILS_UNLIKELY(hasInstancedStereo)) { - cmdColor.primitive.instanceCount = - (soaInstanceInfo[i].count * stereoEyeCount) | - PrimitiveInfo::USER_INSTANCE_MASK; - } + bool const inverseFrontFaces = viewInverseFrontFaces ^ soaVisibility[i].reversedWindingOrder; + bool const hasMorphing = soaVisibility[i].morphing; + bool const hasSkinning = soaVisibility[i].skinning; + bool const hasSkinningOrMorphing = hasSkinning || hasMorphing; // if we are already an SSR variant, the SRE bit is already set, // there is no harm setting it again static_assert(Variant::SPECIAL_SSR & Variant::SRE); + Variant renderableVariant = variant; renderableVariant.setShadowReceiver( Variant::isSSRVariant(variant) || (soaVisibility[i].receiveShadows & hasShadowing)); renderableVariant.setSkinning(hasSkinningOrMorphing); - const FRenderableManager::SkinningBindingInfo& skinning = soaSkinning[i]; - const FRenderableManager::MorphingBindingInfo& morphing = soaMorphing[i]; - - if constexpr (isDepthPass) { - cmdDepth.key = uint64_t(Pass::DEPTH); - cmdDepth.key |= uint64_t(CustomCommand::PASS); - cmdDepth.key |= makeField(soaVisibility[i].priority, PRIORITY_MASK, PRIORITY_SHIFT); - cmdDepth.key |= makeField(soaVisibility[i].channel, CHANNEL_MASK, CHANNEL_SHIFT); - cmdDepth.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); - cmdDepth.primitive.index = i; - cmdDepth.primitive.instanceCount = - soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; - cmdDepth.primitive.hasHybridInstancing = (bool)soaInstanceInfo[i].handle; - cmdDepth.primitive.materialVariant.setSkinning(hasSkinningOrMorphing); - cmdDepth.primitive.rasterState.inverseFrontFaces = inverseFrontFaces; - cmdDepth.primitive.hasMorphing = (bool)morphing.handle; - cmdDepth.primitive.hasSkinning = (bool)skinning.handle; - - if (UTILS_UNLIKELY(hasInstancedStereo)) { - cmdColor.primitive.instanceCount = - (soaInstanceInfo[i].count * stereoEyeCount) | - PrimitiveInfo::USER_INSTANCE_MASK; - } - } + FRenderableManager::SkinningBindingInfo const& skinning = soaSkinning[i]; + FRenderableManager::MorphingBindingInfo const& morphing = soaMorphing[i]; + if constexpr (isColorPass) { renderableVariant.setFog(soaVisibility[i].fog && Variant::isFogVariant(variant)); - cmdColor.primitive.hasMorphing = (bool)morphing.handle; - cmdColor.primitive.hasSkinning = (bool)skinning.handle; + cmd.key = uint64_t(Pass::COLOR); + } else if constexpr (isDepthPass) { + cmd.key = uint64_t(Pass::DEPTH); + cmd.key |= uint64_t(CustomCommand::PASS); + cmd.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); + cmd.info.materialVariant.setSkinning(hasSkinningOrMorphing); + cmd.info.rasterState.inverseFrontFaces = inverseFrontFaces; + } + + cmd.key |= makeField(soaVisibility[i].priority, PRIORITY_MASK, PRIORITY_SHIFT); + cmd.key |= makeField(soaVisibility[i].channel, CHANNEL_MASK, CHANNEL_SHIFT); + cmd.info.index = i; + cmd.info.instanceCount = soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; + cmd.info.hasHybridInstancing = (bool)soaInstanceInfo[i].handle; + cmd.info.hasMorphing = (bool)morphing.handle; + cmd.info.hasSkinning = (bool)skinning.handle; + + // soaInstanceInfo[i].count is the number of instances the user has requested, either for + // manual or hybrid instancing. Instanced stereo multiplies the number of instances by the + // eye count. + if (UTILS_UNLIKELY(hasInstancedStereo)) { + cmd.info.instanceCount = + (soaInstanceInfo[i].count * stereoEyeCount) | + PrimitiveInfo::USER_INSTANCE_MASK; } const bool shadowCaster = soaVisibility[i].castShadows & hasShadowing; @@ -654,19 +640,17 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // TODO: we should disable the SKN variant if this primitive doesn't have either // skinning or morphing. - if constexpr (isColorPass) { - cmdColor.primitive.mi = mi; - cmdColor.primitive.rph = primitive.getHwHandle(); - cmdColor.primitive.vbih = primitive.getVertexBufferInfoHandle(); - cmdColor.primitive.indexOffset = primitive.getIndexOffset(); - cmdColor.primitive.indexCount = primitive.getIndexCount(); - cmdColor.primitive.type = primitive.getPrimitiveType(); - - RenderPass::setupColorCommand(cmdColor, renderableVariant, mi, inverseFrontFaces); - - cmdColor.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle(); + cmd.info.mi = mi; + cmd.info.rph = primitive.getHwHandle(); + cmd.info.vbih = primitive.getVertexBufferInfoHandle(); + cmd.info.indexOffset = primitive.getIndexOffset(); + cmd.info.indexCount = primitive.getIndexCount(); + cmd.info.type = primitive.getPrimitiveType(); + cmd.info.morphTargetBuffer = morphTargets.buffer->getHwHandle(); - const bool blendPass = Pass(cmdColor.key & PASS_MASK) == Pass::BLENDED; + if constexpr (isColorPass) { + RenderPass::setupColorCommand(cmd, renderableVariant, mi, inverseFrontFaces); + const bool blendPass = Pass(cmd.key & PASS_MASK) == Pass::BLENDED; if (blendPass) { // TODO: at least for transparent objects, AABB should be per primitive // but that would break the "local" blend-order, which relies on @@ -674,16 +658,16 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // blend pass: // This will sort back-to-front for blended, and honor explicit ordering // for a given Z value, or globally. - cmdColor.key &= ~BLEND_ORDER_MASK; - cmdColor.key &= ~BLEND_DISTANCE_MASK; + cmd.key &= ~BLEND_ORDER_MASK; + cmd.key &= ~BLEND_DISTANCE_MASK; // write the distance - cmdColor.key |= makeField(~distanceBits, + cmd.key |= makeField(~distanceBits, BLEND_DISTANCE_MASK, BLEND_DISTANCE_SHIFT); // clear the distance if global ordering is enabled - cmdColor.key &= ~select(primitive.isGlobalBlendOrderEnabled(), + cmd.key &= ~select(primitive.isGlobalBlendOrderEnabled(), BLEND_DISTANCE_MASK); // write blend order - cmdColor.key |= makeField(primitive.getBlendOrder(), + cmd.key |= makeField(primitive.getBlendOrder(), BLEND_ORDER_MASK, BLEND_ORDER_SHIFT); @@ -700,11 +684,11 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // In this mode, we override the user's culling mode. // TWO_PASSES_TWO_SIDES: this command will be issued 2nd, draw front faces - cmdColor.primitive.rasterState.culling = + cmd.info.rasterState.culling = (mode == TransparencyMode::TWO_PASSES_TWO_SIDES) ? - CullingMode::BACK : cmdColor.primitive.rasterState.culling; + CullingMode::BACK : cmd.info.rasterState.culling; - uint64_t key = cmdColor.key; + uint64_t key = cmd.key; // draw this command AFTER THE NEXT ONE key |= makeField(1, BLEND_TWO_PASS_MASK, BLEND_TWO_PASS_SHIFT); @@ -718,76 +702,51 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // cancel command if both front and back faces are culled key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); - *curr = cmdColor; + *curr = cmd; curr->key = key; ++curr; // TWO_PASSES_TWO_SIDES: this command will be issued first, draw back sides (i.e. cull front) - cmdColor.primitive.rasterState.culling = + cmd.info.rasterState.culling = (mode == TransparencyMode::TWO_PASSES_TWO_SIDES) ? - CullingMode::FRONT : cmdColor.primitive.rasterState.culling; + CullingMode::FRONT : cmd.info.rasterState.culling; // TWO_PASSES_ONE_SIDE: this command will be issued first, draw (back side) in depth buffer only - cmdColor.primitive.rasterState.depthWrite |= select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); - cmdColor.primitive.rasterState.colorWrite &= ~select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); - cmdColor.primitive.rasterState.depthFunc = + cmd.info.rasterState.depthWrite |= select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); + cmd.info.rasterState.colorWrite &= ~select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); + cmd.info.rasterState.depthFunc = (mode == TransparencyMode::TWO_PASSES_ONE_SIDE) ? - SamplerCompareFunc::GE : cmdColor.primitive.rasterState.depthFunc; - + SamplerCompareFunc::GE : cmd.info.rasterState.depthFunc; } else { // color pass: // This will bucket objects by Z, front-to-back and then sort by material // in each buckets. We use the top 10 bits of the distance, which // bucketizes the depth by its log2 and in 4 linear chunks in each bucket. - cmdColor.key &= ~Z_BUCKET_MASK; - cmdColor.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); + cmd.key &= ~Z_BUCKET_MASK; + cmd.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); } - - *curr = cmdColor; - - // cancel command if both front and back faces are culled - curr->key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); - - ++curr; - } - - if constexpr (isDepthPass) { + } else if constexpr (isDepthPass) { const RasterState rs = ma->getRasterState(); const TransparencyMode mode = mi->getTransparencyMode(); const BlendingMode blendingMode = ma->getBlendingMode(); const bool translucent = (blendingMode != BlendingMode::OPAQUE && blendingMode != BlendingMode::MASKED); - // TODO: we should disable the SKN variant if this primitive doesn't have either - // skinning or morphing. - - cmdDepth.key |= mi->getSortingKey(); // already all set-up for direct or'ing - - // unconditionally write the command - cmdDepth.primitive.mi = mi; - cmdDepth.primitive.rph = primitive.getHwHandle(); - cmdDepth.primitive.vbih = primitive.getVertexBufferInfoHandle(); - cmdDepth.primitive.indexOffset = primitive.getIndexOffset(); - cmdDepth.primitive.indexCount = primitive.getIndexCount(); - cmdDepth.primitive.type = primitive.getPrimitiveType(); - - cmdDepth.primitive.rasterState.culling = mi->getCullingMode(); - cmdDepth.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle(); + cmd.key |= mi->getSortingKey(); // already all set-up for direct or'ing + cmd.info.rasterState.culling = mi->getCullingMode(); // FIXME: should writeDepthForShadowCasters take precedence over mi->getDepthWrite()? - cmdDepth.primitive.rasterState.depthWrite = (1 // only keep bit 0 + cmd.info.rasterState.depthWrite = (1 // only keep bit 0 & (mi->isDepthWriteEnabled() | (mode == TransparencyMode::TWO_PASSES_ONE_SIDE)) - & !(filterTranslucentObjects & translucent) - & !(depthFilterAlphaMaskedObjects & rs.alphaToCoverage)) - | writeDepthForShadowCasters; - - *curr = cmdDepth; - - // cancel command if both front and back faces are culled - curr->key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); - - ++curr; + & !(filterTranslucentObjects & translucent) + & !(depthFilterAlphaMaskedObjects & rs.alphaToCoverage)) + | writeDepthForShadowCasters; } + + *curr = cmd; + // cancel command if both front and back faces are culled + curr->key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); + ++curr; } } return curr; @@ -926,12 +885,12 @@ void RenderPass::Executor::execute(FEngine& engine, } // primitiveHandle may be invalid if no geometry was set on the renderable. - if (UTILS_UNLIKELY(!first->primitive.rph)) { + if (UTILS_UNLIKELY(!first->info.rph)) { continue; } // per-renderable uniform - PrimitiveInfo const info = first->primitive; + PrimitiveInfo const info = first->info; pipeline.rasterState = info.rasterState; pipeline.vertexBufferInfo = info.vbih; pipeline.primitiveType = info.type; diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h index 4a45d057da0..183c7ca7092 100644 --- a/filament/src/RenderPass.h +++ b/filament/src/RenderPass.h @@ -266,7 +266,7 @@ class RenderPass { struct alignas(8) Command { // 64 bytes CommandKey key = 0; // 8 bytes - PrimitiveInfo primitive; // 56 bytes + PrimitiveInfo info; // 56 bytes bool operator < (Command const& rhs) const noexcept { return key < rhs.key; } // placement new declared as "throw" to avoid the compiler's null-check inline void* operator new (size_t, void* ptr) { From 254cf15f0e1e425fa332de29d03d20565980a079 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Wed, 24 Apr 2024 12:32:25 -0700 Subject: [PATCH 17/31] a couple of significant improvements to instancing The main goal of this change is to avoid having to select the "per renderable" UBO at execution time. Instead we want the PrimitiveInfo structure to already know which UBO will be used. Concretely, this means that this determination must be done when the RenderPass is created. When automatic instancing is used, the RenderPass creates a temporary UBO to store the instance info. This UBO's life-time is dictated by both the life-time of the RenderPass and the Executors that where created from it. For this reason we introduce SharedHandle<> to correctly account for the owner's lifetime. This fixes a potential bugs where that handle could have been destroyed and used later; in practice this bug didn't happen however. A couple other changes: - RenderPass has a bunch of fields that were actually temporary, so we removed those. - The canonical "per-renderable" UBO was owned by View but accessed through Scene. This was confusing, it's now accessed through View. --- filament/CMakeLists.txt | 1 + filament/src/RenderPass.cpp | 163 ++++++++++++++++-------------- filament/src/RenderPass.h | 76 ++++++++------ filament/src/RendererUtils.cpp | 4 +- filament/src/RendererUtils.h | 2 +- filament/src/ShadowMapManager.cpp | 3 +- filament/src/SharedHandle.h | 126 +++++++++++++++++++++++ filament/src/details/Renderer.cpp | 3 +- filament/src/details/Scene.cpp | 5 - filament/src/details/Scene.h | 5 - filament/src/details/View.cpp | 3 +- filament/src/details/View.h | 5 +- 12 files changed, 276 insertions(+), 120 deletions(-) create mode 100644 filament/src/SharedHandle.h diff --git a/filament/CMakeLists.txt b/filament/CMakeLists.txt index 12e5ca76691..796acedb7d2 100644 --- a/filament/CMakeLists.txt +++ b/filament/CMakeLists.txt @@ -163,6 +163,7 @@ set(PRIVATE_HDRS src/ResourceList.h src/ShadowMap.h src/ShadowMapManager.h + src/SharedHandle.h src/TypedUniformBuffer.h src/UniformBuffer.h src/components/CameraManager.h diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp index 03ad3037162..c091f34113e 100644 --- a/filament/src/RenderPass.cpp +++ b/filament/src/RenderPass.cpp @@ -18,8 +18,8 @@ #include "RenderPrimitive.h" #include "ShadowMap.h" +#include "SharedHandle.h" -#include "details/Camera.h" #include "details/Material.h" #include "details/MaterialInstance.h" #include "details/View.h" @@ -88,24 +88,26 @@ RenderPass RenderPassBuilder::build(FEngine& engine) { // ------------------------------------------------------------------------------------------------ +void RenderPass::BufferObjectHandleDeleter::operator()( + backend::BufferObjectHandle handle) noexcept { + if (handle) { + driver.get().destroyBufferObject(handle); + } +} + +// ------------------------------------------------------------------------------------------------ + RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept : mRenderableSoa(*builder.mRenderableSoa), - mVisibleRenderables(builder.mVisibleRenderables), - mUboHandle(builder.mUboHandle), - mCameraPosition(builder.mCameraPosition), - mCameraForwardVector(builder.mCameraForwardVector), - mFlags(builder.mFlags), - mVariant(builder.mVariant), - mVisibilityMask(builder.mVisibilityMask), mScissorViewport(builder.mScissorViewport), mCustomCommands(engine.getPerRenderPassArena()) { // compute the number of commands we need updateSummedPrimitiveCounts( - const_cast(mRenderableSoa), mVisibleRenderables); + const_cast(mRenderableSoa), builder.mVisibleRenderables); uint32_t commandCount = - FScene::getPrimitiveCount(mRenderableSoa, mVisibleRenderables.last); + FScene::getPrimitiveCount(mRenderableSoa, builder.mVisibleRenderables.last); const bool colorPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::COLOR); const bool depthPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::DEPTH); commandCount *= uint32_t(colorPass * 2 + depthPass); @@ -129,7 +131,15 @@ RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexce mCommandBegin = curr; mCommandEnd = curr + commandCount + customCommandCount; - appendCommands(engine, { curr, commandCount }, builder.mCommandTypeFlags); + appendCommands(engine, { curr, commandCount }, + builder.mUboHandle, + builder.mVisibleRenderables, + builder.mCommandTypeFlags, + builder.mFlags, + builder.mVisibilityMask, + builder.mVariant, + builder.mCameraPosition, + builder.mCameraForwardVector); if (builder.mCustomCommands.has_value()) { Command* p = curr + commandCount; @@ -147,7 +157,8 @@ RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexce } // this destructor is actually heavy because it inlines ~vector<> -RenderPass::~RenderPass() noexcept = default; +RenderPass::~RenderPass() noexcept { +} void RenderPass::resize(Arena& arena, size_t count) noexcept { if (mCommandBegin) { @@ -157,11 +168,18 @@ void RenderPass::resize(Arena& arena, size_t count) noexcept { } void RenderPass::appendCommands(FEngine& engine, - Slice commands, CommandTypeFlags const commandTypeFlags) noexcept { + Slice commands, + backend::BufferObjectHandle const uboHandle, + utils::Range const vr, + CommandTypeFlags const commandTypeFlags, + RenderFlags const renderFlags, + FScene::VisibleMaskType const visibilityMask, + Variant const variant, + float3 const cameraPosition, + float3 const cameraForwardVector) noexcept { SYSTRACE_CALL(); SYSTRACE_CONTEXT(); - utils::Range const vr = mVisibleRenderables; // trace the number of visible renderables SYSTRACE_VALUE32("visibleRenderables", vr.size()); if (UTILS_UNLIKELY(vr.empty())) { @@ -174,9 +192,6 @@ void RenderPass::appendCommands(FEngine& engine, } JobSystem& js = engine.getJobSystem(); - const RenderFlags renderFlags = mFlags; - const Variant variant = mVariant; - const FScene::VisibleMaskType visibilityMask = mVisibilityMask; // up-to-date summed primitive counts needed for generateCommands() FScene::RenderableSoa const& soa = mRenderableSoa; @@ -186,13 +201,14 @@ void RenderPass::appendCommands(FEngine& engine, auto stereoscopicEyeCount = engine.getConfig().stereoscopicEyeCount; - const float3 cameraPosition(mCameraPosition); - const float3 cameraForwardVector(mCameraForwardVector); - auto work = [commandTypeFlags, curr, &soa, variant, renderFlags, visibilityMask, cameraPosition, - cameraForwardVector, stereoscopicEyeCount] + auto work = [commandTypeFlags, curr, &soa, + boh = uboHandle, + variant, renderFlags, visibilityMask, + cameraPosition, cameraForwardVector, stereoscopicEyeCount] (uint32_t startIndex, uint32_t indexCount) { RenderPass::generateCommands(commandTypeFlags, curr, - soa, { startIndex, startIndex + indexCount }, variant, renderFlags, visibilityMask, + soa, { startIndex, startIndex + indexCount }, boh, + variant, renderFlags, visibilityMask, cameraPosition, cameraForwardVector, stereoscopicEyeCount); }; @@ -204,8 +220,8 @@ void RenderPass::appendCommands(FEngine& engine, js.runAndWait(jobCommandsParallel); } - // always add an "eof" command - // "eof" command. these commands are guaranteed to be sorted last in the + // Always add an "eof" command + // "eof" command. These commands are guaranteed to be sorted last in the // command buffer. curr[commandCount - 1].key = uint64_t(Pass::SENTINEL); @@ -293,6 +309,8 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { // Currently, if we have skinnning or morphing, we can't use auto instancing. This is // because the morphing/skinning data for comparison is not easily accessible. // Additionally, we can't have a different skinning/morphing per instance anyway. + // And thirdly, the info.index meaning changes with instancing, it is the index into + // the instancing buffer no longer the index into the soa. Command const* e = curr + 1; if (UTILS_LIKELY(!curr->info.hasSkinning && !curr->info.hasMorphing)) { // we can't have nice things! No more than maxInstanceCount due to UBO size limits @@ -318,6 +336,15 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { // allocate our staging buffer only if needed if (UTILS_UNLIKELY(!stagingBuffer)) { + + // create a temporary UBO for instancing + size_t const count = mCommandEnd - mCommandBegin; + mInstancedUboHandle = BufferObjectSharedHandle{ + engine.getDriverApi().createBufferObject( + count * sizeof(PerRenderableData) + sizeof(PerRenderableUib), + BufferObjectBinding::UNIFORM, BufferUsage::STATIC), + engine.getDriverApi() }; + // TODO: use stream inline buffer for small sizes // TODO: use a pool for larger heap buffers // buffer large enough for all instances data @@ -337,6 +364,8 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { // make the first command instanced curr[0].info.instanceCount = instanceCount; curr[0].info.index = instancedPrimitiveOffset; + curr[0].info.boh = mInstancedUboHandle; + instancedPrimitiveOffset += instanceCount; // cancel commands that are now instances @@ -356,12 +385,6 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { // we have instanced primitives DriverApi& driver = engine.getDriverApi(); - // TODO: maybe use a pool? so we can reuse the buffer. - // create a ubo to hold the instanced primitive data - mInstancedUboHandle = driver.createBufferObject( - sizeof(PerRenderableData) * instancedPrimitiveOffset + sizeof(PerRenderableUib), - BufferObjectBinding::UNIFORM, backend::BufferUsage::STATIC); - // copy our instanced ubo data driver.updateBufferObjectUnsynchronized(mInstancedUboHandle, { stagingBuffer, sizeof(PerRenderableData) * instancedPrimitiveOffset, @@ -437,6 +460,7 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant, UTILS_NOINLINE void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* const commands, FScene::RenderableSoa const& soa, Range range, + backend::BufferObjectHandle renderablesUbo, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward, uint8_t stereoEyeCount) noexcept { @@ -470,12 +494,14 @@ void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* co switch (commandTypeFlags & (CommandTypeFlags::COLOR | CommandTypeFlags::DEPTH)) { case CommandTypeFlags::COLOR: curr = generateCommandsImpl(commandTypeFlags, curr, - soa, range, variant, renderFlags, visibilityMask, cameraPosition, cameraForward, + soa, range, renderablesUbo, + variant, renderFlags, visibilityMask, cameraPosition, cameraForward, stereoEyeCount); break; case CommandTypeFlags::DEPTH: curr = generateCommandsImpl(commandTypeFlags, curr, - soa, range, variant, renderFlags, visibilityMask, cameraPosition, cameraForward, + soa, range, renderablesUbo, + variant, renderFlags, visibilityMask, cameraPosition, cameraForward, stereoEyeCount); break; default: @@ -498,6 +524,7 @@ UTILS_NOINLINE RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* UTILS_RESTRICT curr, FScene::RenderableSoa const& UTILS_RESTRICT soa, Range range, + backend::BufferObjectHandle renderablesUbo, Variant const variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward, uint8_t stereoEyeCount) noexcept { @@ -609,18 +636,25 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla cmd.key |= makeField(soaVisibility[i].priority, PRIORITY_MASK, PRIORITY_SHIFT); cmd.key |= makeField(soaVisibility[i].channel, CHANNEL_MASK, CHANNEL_SHIFT); cmd.info.index = i; - cmd.info.instanceCount = soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; cmd.info.hasHybridInstancing = (bool)soaInstanceInfo[i].handle; + cmd.info.instanceCount = soaInstanceInfo[i].count; cmd.info.hasMorphing = (bool)morphing.handle; cmd.info.hasSkinning = (bool)skinning.handle; // soaInstanceInfo[i].count is the number of instances the user has requested, either for // manual or hybrid instancing. Instanced stereo multiplies the number of instances by the // eye count. - if (UTILS_UNLIKELY(hasInstancedStereo)) { - cmd.info.instanceCount = - (soaInstanceInfo[i].count * stereoEyeCount) | - PrimitiveInfo::USER_INSTANCE_MASK; + if (hasInstancedStereo) { + cmd.info.instanceCount *= stereoEyeCount; + } + + if (cmd.info.hasHybridInstancing) { + // with hybrid instancing, we already know which UBO to use + cmd.info.boh = soaInstanceInfo[i].handle; + } else { + // with no- or user- instancing, we can only know after instanceify() + assert_invariant(cmd.info.instanceCount <= 1); + cmd.info.boh = renderablesUbo; } const bool shadowCaster = soaVisibility[i].castShadows & hasShadowing; @@ -926,37 +960,16 @@ void RenderPass::Executor::execute(FEngine& engine, assert_invariant(ma); pipeline.program = ma->getProgram(info.materialVariant); - uint16_t const instanceCount = - info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK; - auto getPerObjectUboHandle = - [this, &info, &instanceCount]() -> std::pair, uint32_t> { - if (info.hasHybridInstancing) { - FScene::RenderableSoa const& soa = *mRenderableSoa; - // "hybrid" instancing -- instanceBufferHandle takes the place of the UBO - return { soa.elementAt(info.index).handle, 0 }; - } - bool const userInstancing = - (info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u; - if (!userInstancing && instanceCount > 1) { - // automatic instancing - return { - mInstancedUboHandle, - info.index * sizeof(PerRenderableData) }; - } else { - // manual instancing - return { mUboHandle, info.index * sizeof(PerRenderableData) }; - } - }; - // Bind per-renderable uniform block. There is no need to attempt to skip this command // because the backends already do this. - auto const [perObjectUboHandle, offset] = getPerObjectUboHandle(); - assert_invariant(perObjectUboHandle); + size_t const offset = info.hasHybridInstancing ? + 0 : info.index * sizeof(PerRenderableData); + + assert_invariant(info.boh); + driver.bindBufferRange(BufferObjectBinding::UNIFORM, +UniformBindingPoints::PER_RENDERABLE, - perObjectUboHandle, - offset, - sizeof(PerRenderableUib)); + info.boh, offset, sizeof(PerRenderableUib)); if (UTILS_UNLIKELY(info.hasSkinning)) { @@ -1020,7 +1033,7 @@ void RenderPass::Executor::execute(FEngine& engine, driver.bindRenderPrimitive(info.rph); } - driver.draw2(info.indexOffset, info.indexCount, instanceCount); + driver.draw2(info.indexOffset, info.indexCount, info.instanceCount); } } @@ -1030,21 +1043,16 @@ void RenderPass::Executor::execute(FEngine& engine, engine.flush(); } } - - if (mInstancedUboHandle) { - driver.destroyBufferObject(mInstancedUboHandle); - } - } // ------------------------------------------------------------------------------------------------ -RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept +RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command const* e, + BufferObjectSharedHandle instancedUbo) noexcept : mRenderableSoa(&pass->mRenderableSoa), mCommands(b, e), mCustomCommands(pass->mCustomCommands.data(), pass->mCustomCommands.size()), - mUboHandle(pass->mUboHandle), - mInstancedUboHandle(pass->mInstancedUboHandle), + mInstancedUboHandle(std::move(instancedUbo)), mScissorViewport(pass->mScissorViewport), mPolygonOffsetOverride(false), mScissorOverride(false) { @@ -1052,7 +1060,14 @@ RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command assert_invariant(e <= pass->end()); } -RenderPass::Executor::Executor(Executor const& rhs) = default; +RenderPass::Executor::Executor() noexcept + : mPolygonOffsetOverride(false), + mScissorOverride(false) { +} + +RenderPass::Executor::Executor(Executor&& rhs) noexcept = default; + +RenderPass::Executor& RenderPass::Executor::operator=(Executor&& rhs) noexcept = default; // this destructor is actually heavy because it inlines ~vector<> RenderPass::Executor::~Executor() noexcept = default; diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h index 183c7ca7092..3ab81d02e4b 100644 --- a/filament/src/RenderPass.h +++ b/filament/src/RenderPass.h @@ -19,16 +19,19 @@ #include "Allocators.h" +#include "SharedHandle.h" + #include "details/Camera.h" #include "details/Scene.h" #include "private/filament/Variant.h" -#include "utils/BitmaskEnum.h" +#include #include #include #include +#include #include #include #include @@ -243,6 +246,7 @@ class RenderPass { }; backend::RenderPrimitiveHandle rph; // 4 bytes backend::VertexBufferInfoHandle vbih; // 4 bytes + backend::BufferObjectHandle boh; // 4 bytes uint32_t indexOffset; // 4 bytes uint32_t indexCount; // 4 bytes uint32_t index = 0; // 4 bytes @@ -257,10 +261,7 @@ class RenderPass { bool hasMorphing : 1; // 1 bit bool hasHybridInstancing : 1; // 1 bit - uint64_t rfu[2]; // 16 bytes - - static const uint16_t USER_INSTANCE_MASK = 0x8000u; - static const uint16_t INSTANCE_COUNT_MASK = 0x7fffu; + uint32_t rfu[3]; // 16 bytes }; static_assert(sizeof(PrimitiveInfo) == 56); @@ -292,11 +293,11 @@ class RenderPass { // RenderPass can only be moved RenderPass(RenderPass&& rhs) = default; + RenderPass& operator=(RenderPass&& rhs) = delete; // could be supported if needed // RenderPass can't be copied RenderPass(RenderPass const& rhs) = delete; RenderPass& operator=(RenderPass const& rhs) = delete; - RenderPass& operator=(RenderPass&& rhs) = delete; // allocated commands ARE NOT freed, they're owned by the Arena ~RenderPass() noexcept; @@ -311,6 +312,17 @@ class RenderPass { backend::Handle renderTarget, backend::RenderPassParams params) noexcept; + + class BufferObjectHandleDeleter { + std::reference_wrapper driver; + public: + explicit BufferObjectHandleDeleter(backend::DriverApi& driver) noexcept : driver(driver) { } + void operator()(backend::BufferObjectHandle handle) noexcept; + }; + + using BufferObjectSharedHandle = SharedHandle< + backend::HwBufferObject, BufferObjectHandleDeleter>; + /* * Executor holds the range of commands to execute for a given pass */ @@ -323,8 +335,7 @@ class RenderPass { FScene::RenderableSoa const* mRenderableSoa = nullptr; utils::Slice mCommands; utils::Slice mCustomCommands; - backend::Handle mUboHandle; - backend::Handle mInstancedUboHandle; + BufferObjectSharedHandle mInstancedUboHandle; backend::Viewport mScissorViewport; backend::Viewport mScissor{}; // value of scissor override @@ -332,7 +343,8 @@ class RenderPass { bool mPolygonOffsetOverride : 1; // whether to override the polygon offset setting bool mScissorOverride : 1; // whether to override the polygon offset setting - Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept; + Executor(RenderPass const* pass, Command const* b, Command const* e, + BufferObjectSharedHandle instancedUbo) noexcept; void execute(FEngine& engine, const Command* first, const Command* last) const noexcept; @@ -341,9 +353,16 @@ class RenderPass { backend::Viewport const& scissor) noexcept; public: - Executor() = default; - Executor(Executor const& rhs); - Executor& operator=(Executor const& rhs) = default; + Executor() noexcept; + + // can't be copied + Executor(Executor const& rhs) noexcept = delete; + Executor& operator=(Executor const& rhs) noexcept = delete; + + // can be moved + Executor(Executor&& rhs) noexcept; + Executor& operator=(Executor&& rhs) noexcept; + ~Executor() noexcept; // if non-null, overrides the material's polygon offset @@ -358,11 +377,11 @@ class RenderPass { // returns a new executor for this pass Executor getExecutor() const { - return { this, mCommandBegin, mCommandEnd }; + return getExecutor(mCommandBegin, mCommandEnd); } Executor getExecutor(Command const* b, Command const* e) const { - return { this, b, e }; + return { this, b, e, mInstancedUboHandle }; } private: @@ -373,7 +392,15 @@ class RenderPass { // This is the main function of this class, this appends commands to the pass using // the current camera, geometry and flags set. This can be called multiple times if needed. void appendCommands(FEngine& engine, - utils::Slice commands, CommandTypeFlags commandTypeFlags) noexcept; + utils::Slice commands, + backend::BufferObjectHandle uboHandle, + utils::Range const visibleRenderables, + CommandTypeFlags commandTypeFlags, + RenderFlags renderFlags, + FScene::VisibleMaskType visibilityMask, + Variant variant, + math::float3 cameraPosition, + math::float3 cameraForwardVector) noexcept; // Appends a custom command. void appendCustomCommand(Command* commands, @@ -399,6 +426,7 @@ class RenderPass { static inline void generateCommands(CommandTypeFlags commandTypeFlags, Command* commands, FScene::RenderableSoa const& soa, utils::Range range, + backend::BufferObjectHandle renderablesUbo, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, math::float3 cameraPosition, math::float3 cameraForward, @@ -407,6 +435,7 @@ class RenderPass { template static inline Command* generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* curr, FScene::RenderableSoa const& soa, utils::Range range, + backend::BufferObjectHandle renderablesUbo, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, math::float3 cameraPosition, math::float3 cameraForward, uint8_t instancedStereoEyeCount) noexcept; @@ -417,23 +446,12 @@ class RenderPass { static void updateSummedPrimitiveCounts( FScene::RenderableSoa& renderableData, utils::Range vr) noexcept; - FScene::RenderableSoa const& mRenderableSoa; - utils::Range const mVisibleRenderables; - backend::Handle const mUboHandle; - math::float3 const mCameraPosition; - math::float3 const mCameraForwardVector; - RenderFlags const mFlags; - Variant const mVariant; - FScene::VisibleMaskType const mVisibilityMask; backend::Viewport const mScissorViewport; - - // Pointer to the first command - Command* mCommandBegin = nullptr; - // Pointer to one past the last command - Command* mCommandEnd = nullptr; + Command* mCommandBegin = nullptr; // Pointer to the first command + Command* mCommandEnd = nullptr; // Pointer to one past the last command // a UBO for instanced primitives - backend::Handle mInstancedUboHandle; + BufferObjectSharedHandle mInstancedUboHandle; // a vector for our custom commands using CustomCommandVector = std::vector>; diff --git a/filament/src/RendererUtils.cpp b/filament/src/RendererUtils.cpp index 3e1cafdbc9d..e5b29567fe4 100644 --- a/filament/src/RendererUtils.cpp +++ b/filament/src/RendererUtils.cpp @@ -54,7 +54,7 @@ FrameGraphId RendererUtils::colorPass( FrameGraph& fg, const char* name, FEngine& engine, FView const& view, FrameGraphTexture::Descriptor const& colorBufferDesc, ColorPassConfig const& config, PostProcessManager::ColorGradingConfig colorGradingConfig, - RenderPass::Executor const& passExecutor) noexcept { + RenderPass::Executor passExecutor) noexcept { struct ColorPassData { FrameGraphId shadows; @@ -200,7 +200,7 @@ FrameGraphId RendererUtils::colorPass( .clearFlags = clearColorFlags | clearDepthFlags | clearStencilFlags}); blackboard["depth"] = data.depth; }, - [=, &view, &engine](FrameGraphResources const& resources, + [=, passExecutor = std::move(passExecutor), &view, &engine](FrameGraphResources const& resources, ColorPassData const& data, DriverApi& driver) { auto out = resources.getRenderPassInfo(); diff --git a/filament/src/RendererUtils.h b/filament/src/RendererUtils.h index 0a5f7b2808b..9b7e93cbc60 100644 --- a/filament/src/RendererUtils.h +++ b/filament/src/RendererUtils.h @@ -76,7 +76,7 @@ class RendererUtils { FrameGraphTexture::Descriptor const& colorBufferDesc, ColorPassConfig const& config, PostProcessManager::ColorGradingConfig colorGradingConfig, - RenderPass::Executor const& passExecutor) noexcept; + RenderPass::Executor passExecutor) noexcept; static std::pair, bool> refractionPass( FrameGraph& fg, FEngine& engine, FView const& view, diff --git a/filament/src/ShadowMapManager.cpp b/filament/src/ShadowMapManager.cpp index 4f971450fbe..8943371855c 100644 --- a/filament/src/ShadowMapManager.cpp +++ b/filament/src/ShadowMapManager.cpp @@ -365,7 +365,8 @@ FrameGraphId ShadowMapManager::render(FEngine& engine, FrameG .camera(cameraInfo) .visibilityMask(entry.visibilityMask) .geometry(scene->getRenderableData(), - entry.range, scene->getRenderableUBO()) + entry.range, + view.getRenderableUBO()) .commandTypeFlags(RenderPass::CommandTypeFlags::SHADOW) .build(engine); diff --git a/filament/src/SharedHandle.h b/filament/src/SharedHandle.h new file mode 100644 index 00000000000..8d793f31e43 --- /dev/null +++ b/filament/src/SharedHandle.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_FILAMENT_SHARED_HANDLE_H +#define TNT_FILAMENT_SHARED_HANDLE_H + +#include + +#include + +namespace filament { + +/* + * SharedHandle is a bit like shared_ptr<> but for Handle<>, the destruction is + * performed by a Deleter functor that needs to be provided. We only support strong + * references for now. + * + * caveat: The current implementation is not thread-safe. + */ +template +struct SharedHandle { + SharedHandle() noexcept = default; + + ~SharedHandle() noexcept { + dec(mControlBlockPtr); + } + + SharedHandle(SharedHandle const& rhs) noexcept + : mControlBlockPtr(inc(rhs.mControlBlockPtr)) { + } + + SharedHandle(SharedHandle&& rhs) noexcept { + std::swap(mControlBlockPtr, rhs.mControlBlockPtr); + } + + SharedHandle& operator=(SharedHandle const& rhs) noexcept { + if (this != &rhs) { + inc(rhs.mControlBlockPtr); // add a reference to other control block + dec(mControlBlockPtr); // drop a reference from ours (possibly destroying it) + mControlBlockPtr = rhs.mControlBlockPtr; // adopt the new control block + } + return *this; + } + + SharedHandle& operator=(SharedHandle&& rhs) noexcept { + if (this != &rhs) { + std::swap(mControlBlockPtr, rhs.mControlBlockPtr); + } + return *this; + } + + // initialize the SharedHandle and provide a Deleter + template + explicit SharedHandle(backend::Handle const& rhs, ARGS&& ... args) noexcept + : mControlBlockPtr(new ControlBlock(rhs, std::forward(args)...)) { + } + + // initialize the SharedHandle and provide a Deleter + template + explicit SharedHandle(backend::Handle&& rhs, ARGS&& ... args) noexcept + : mControlBlockPtr(new ControlBlock(rhs, std::forward(args)...)) { + } + + // automatically converts to Handle + operator backend::Handle() const noexcept { // NOLINT(*-explicit-constructor) + return mControlBlockPtr ? mControlBlockPtr->handle : backend::Handle{}; + } + + explicit operator bool() const noexcept { + return mControlBlockPtr ? (bool)mControlBlockPtr->handle : false; + } + + void clear() noexcept { dec(mControlBlockPtr); } + +private: + struct ControlBlock { + template + explicit ControlBlock(backend::Handle handle, ARGS&& ... args) noexcept + : deleter(std::forward(args)...), handle(std::move(handle)) { + } + void inc() noexcept { + ++count; + } + void dec() noexcept { + if (--count == 0) { + deleter(handle); + delete this; + } + } + Deleter deleter; + int32_t count = 1; + backend::Handle handle; + }; + + ControlBlock* inc(ControlBlock* const ctrlBlk) noexcept { + if (ctrlBlk) { + ctrlBlk->inc(); + } + return ctrlBlk; + } + + void dec(ControlBlock* const ctrlBlk) noexcept { + if (ctrlBlk) { + ctrlBlk->dec(); + } + } + + ControlBlock* mControlBlockPtr = nullptr; +}; + +} // namespace filament + +#endif // TNT_FILAMENT_SHARED_HANDLE_H diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp index cc4994e6190..8fc05cf537d 100644 --- a/filament/src/details/Renderer.cpp +++ b/filament/src/details/Renderer.cpp @@ -840,7 +840,8 @@ void FRenderer::renderJob(RootArenaScope& rootArenaScope, FView& view) { passBuilder.camera(cameraInfo); passBuilder.geometry(scene.getRenderableData(), - view.getVisibleRenderables(), scene.getRenderableUBO()); + view.getVisibleRenderables(), + view.getRenderableUBO()); // view set-ups that need to happen before rendering fg.addTrivialSideEffectPass("Prepare View Uniforms", diff --git a/filament/src/details/Scene.cpp b/filament/src/details/Scene.cpp index ff6af6293a5..01888e89da1 100644 --- a/filament/src/details/Scene.cpp +++ b/filament/src/details/Scene.cpp @@ -388,9 +388,6 @@ void FScene::updateUBOs( SYSTRACE_CALL(); FEngine::DriverApi& driver = mEngine.getDriverApi(); - // store the UBO handle - mRenderableViewUbh = renderableUbh; - // don't allocate more than 16 KiB directly into the render stream static constexpr size_t MAX_STREAM_ALLOCATION_COUNT = 64; // 16 KiB const size_t count = visibleRenderables.size(); @@ -450,8 +447,6 @@ void FScene::updateUBOs( } void FScene::terminate(FEngine&) { - // DO NOT destroy this UBO, it's owned by the View - mRenderableViewUbh.clear(); } void FScene::prepareDynamicLights(const CameraInfo& camera, diff --git a/filament/src/details/Scene.h b/filament/src/details/Scene.h index 490d115af3c..9690e6386e2 100644 --- a/filament/src/details/Scene.h +++ b/filament/src/details/Scene.h @@ -80,10 +80,6 @@ class FScene : public Scene { void prepareDynamicLights(const CameraInfo& camera, backend::Handle lightUbh) noexcept; - backend::Handle getRenderableUBO() const noexcept { - return mRenderableViewUbh; - } - /* * Storage for per-frame renderable data */ @@ -228,7 +224,6 @@ class FScene : public Scene { */ RenderableSoa mRenderableData; LightSoa mLightData; - backend::Handle mRenderableViewUbh; // This is actually owned by the view. bool mHasContactShadows = false; // State shared between Scene and driver callbacks. diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp index 9ce255332d1..19b50ce7762 100644 --- a/filament/src/details/View.cpp +++ b/filament/src/details/View.cpp @@ -649,7 +649,8 @@ void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootAren const size_t count = std::max(size_t(16u), (4u * merged.size() + 2u) / 3u); mRenderableUBOSize = uint32_t(count * sizeof(PerRenderableData)); driver.destroyBufferObject(mRenderableUbh); - mRenderableUbh = driver.createBufferObject(mRenderableUBOSize + sizeof(PerRenderableUib), + mRenderableUbh = driver.createBufferObject( + mRenderableUBOSize + sizeof(PerRenderableUib), BufferObjectBinding::UNIFORM, BufferUsage::DYNAMIC); } else { // TODO: should we shrink the underlying UBO at some point? diff --git a/filament/src/details/View.h b/filament/src/details/View.h index be15af9e520..c2fb5bb58b5 100644 --- a/filament/src/details/View.h +++ b/filament/src/details/View.h @@ -437,8 +437,11 @@ class FView : public View { return mFogEntity; } -private: + backend::Handle getRenderableUBO() const noexcept { + return mRenderableUbh; + } +private: struct FPickingQuery : public PickingQuery { private: FPickingQuery(uint32_t x, uint32_t y, From 08d6d57ad0613a3916e3c1f0d73b264fee48f5cf Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Fri, 26 Apr 2024 14:48:15 -0700 Subject: [PATCH 18/31] vk: simplify VulkanAttachment (#7801) --- filament/backend/src/vulkan/VulkanBlitter.cpp | 34 +++---------------- filament/backend/src/vulkan/VulkanContext.cpp | 12 ++++--- filament/backend/src/vulkan/VulkanContext.h | 6 ++-- filament/backend/src/vulkan/VulkanDriver.cpp | 16 ++++----- .../backend/src/vulkan/VulkanReadPixels.cpp | 5 ++- .../backend/src/vulkan/VulkanReadPixels.h | 6 ++-- .../caching/VulkanDescriptorSetManager.cpp | 2 +- 7 files changed, 29 insertions(+), 52 deletions(-) diff --git a/filament/backend/src/vulkan/VulkanBlitter.cpp b/filament/backend/src/vulkan/VulkanBlitter.cpp index cde45ad0d87..215003e7c11 100644 --- a/filament/backend/src/vulkan/VulkanBlitter.cpp +++ b/filament/backend/src/vulkan/VulkanBlitter.cpp @@ -45,21 +45,8 @@ inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, << " layout=" << dst.getLayout() << utils::io::endl; } - const VkImageSubresourceRange srcRange = { - .aspectMask = aspect, - .baseMipLevel = src.level, - .levelCount = 1, - .baseArrayLayer = src.layer, - .layerCount = 1, - }; - - const VkImageSubresourceRange dstRange = { - .aspectMask = aspect, - .baseMipLevel = dst.level, - .levelCount = 1, - .baseArrayLayer = dst.layer, - .layerCount = 1, - }; + VkImageSubresourceRange const srcRange = src.getSubresourceRange(); + VkImageSubresourceRange const dstRange = dst.getSubresourceRange(); VulkanLayout oldSrcLayout = src.getLayout(); VulkanLayout oldDstLayout = dst.getLayout(); @@ -97,21 +84,8 @@ inline void resolveFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspe << " layout=" << dst.getLayout() << utils::io::endl; } - const VkImageSubresourceRange srcRange = { - .aspectMask = aspect, - .baseMipLevel = src.level, - .levelCount = 1, - .baseArrayLayer = src.layer, - .layerCount = 1, - }; - - const VkImageSubresourceRange dstRange = { - .aspectMask = aspect, - .baseMipLevel = dst.level, - .levelCount = 1, - .baseArrayLayer = dst.layer, - .layerCount = 1, - }; + VkImageSubresourceRange const srcRange = src.getSubresourceRange(); + VkImageSubresourceRange const dstRange = dst.getSubresourceRange(); VulkanLayout oldSrcLayout = src.getLayout(); VulkanLayout oldDstLayout = dst.getLayout(); diff --git a/filament/backend/src/vulkan/VulkanContext.cpp b/filament/backend/src/vulkan/VulkanContext.cpp index 1fc2e46c77c..c590977af2a 100644 --- a/filament/backend/src/vulkan/VulkanContext.cpp +++ b/filament/backend/src/vulkan/VulkanContext.cpp @@ -57,15 +57,19 @@ VkExtent2D VulkanAttachment::getExtent2D() const { return { std::max(1u, texture->width >> level), std::max(1u, texture->height >> level) }; } -VkImageView VulkanAttachment::getImageView(VkImageAspectFlags aspect) { +VkImageView VulkanAttachment::getImageView() { assert_invariant(texture); - return texture->getAttachmentView(getSubresourceRange(aspect)); + return texture->getAttachmentView(getSubresourceRange()); } -VkImageSubresourceRange VulkanAttachment::getSubresourceRange(VkImageAspectFlags aspect) const { +bool VulkanAttachment::isDepth() const { + return texture->getImageAspect() & VK_IMAGE_ASPECT_DEPTH_BIT; +} + +VkImageSubresourceRange VulkanAttachment::getSubresourceRange() const { assert_invariant(texture); return { - .aspectMask = aspect, + .aspectMask = texture->getImageAspect(), .baseMipLevel = uint32_t(level), .levelCount = 1, .baseArrayLayer = uint32_t(layer), diff --git a/filament/backend/src/vulkan/VulkanContext.h b/filament/backend/src/vulkan/VulkanContext.h index bdc3af691a9..995175ce943 100644 --- a/filament/backend/src/vulkan/VulkanContext.h +++ b/filament/backend/src/vulkan/VulkanContext.h @@ -44,13 +44,15 @@ struct VulkanAttachment { VulkanTexture* texture = nullptr; uint8_t level = 0; uint16_t layer = 0; + + bool isDepth() const; VkImage getImage() const; VkFormat getFormat() const; VulkanLayout getLayout() const; VkExtent2D getExtent2D() const; - VkImageView getImageView(VkImageAspectFlags aspect); + VkImageView getImageView(); // TODO: maybe embed aspect into the attachment or texture itself. - VkImageSubresourceRange getSubresourceRange(VkImageAspectFlags aspect) const; + VkImageSubresourceRange getSubresourceRange() const; }; class VulkanTimestamps { diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index dada1d695cd..5d015a7a41a 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -1253,7 +1253,7 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP discardEndVal &= ~TargetBufferFlags::DEPTH; clearVal &= ~TargetBufferFlags::DEPTH; } - auto const attachmentSubresourceRange = depth.getSubresourceRange(VK_IMAGE_ASPECT_DEPTH_BIT); + auto const attachmentSubresourceRange = depth.getSubresourceRange(); depth.texture->setLayout(attachmentSubresourceRange, VulkanLayout::DEPTH_ATTACHMENT); } @@ -1279,9 +1279,9 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP rpkey.needsResolveMask |= (1 << i); } if (info.texture->getPrimaryImageLayout() != VulkanLayout::COLOR_ATTACHMENT) { - ((VulkanTexture*) info.texture)->transitionLayout(cmdbuffer, - info.getSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT), - VulkanLayout::COLOR_ATTACHMENT); + ((VulkanTexture*) info.texture) + ->transitionLayout(cmdbuffer, info.getSubresourceRange(), + VulkanLayout::COLOR_ATTACHMENT); } } else { rpkey.colorFormat[i] = VK_FORMAT_UNDEFINED; @@ -1304,21 +1304,21 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP fbkey.color[i] = VK_NULL_HANDLE; fbkey.resolve[i] = VK_NULL_HANDLE; } else if (fbkey.samples == 1) { - fbkey.color[i] = rt->getColor(i).getImageView(VK_IMAGE_ASPECT_COLOR_BIT); + fbkey.color[i] = rt->getColor(i).getImageView(); fbkey.resolve[i] = VK_NULL_HANDLE; assert_invariant(fbkey.color[i]); } else { - fbkey.color[i] = rt->getMsaaColor(i).getImageView(VK_IMAGE_ASPECT_COLOR_BIT); + fbkey.color[i] = rt->getMsaaColor(i).getImageView(); VulkanTexture* texture = (VulkanTexture*) rt->getColor(i).texture; if (texture->samples == 1) { - fbkey.resolve[i] = rt->getColor(i).getImageView(VK_IMAGE_ASPECT_COLOR_BIT); + fbkey.resolve[i] = rt->getColor(i).getImageView(); assert_invariant(fbkey.resolve[i]); } assert_invariant(fbkey.color[i]); } } if (depth.texture) { - fbkey.depth = depth.getImageView(VK_IMAGE_ASPECT_DEPTH_BIT); + fbkey.depth = depth.getImageView(); assert_invariant(fbkey.depth); // Vulkan 1.1 does not support multisampled depth resolve, so let's check here diff --git a/filament/backend/src/vulkan/VulkanReadPixels.cpp b/filament/backend/src/vulkan/VulkanReadPixels.cpp index 83e341c2768..4fdebaadedd 100644 --- a/filament/backend/src/vulkan/VulkanReadPixels.cpp +++ b/filament/backend/src/vulkan/VulkanReadPixels.cpp @@ -118,7 +118,7 @@ void VulkanReadPixels::terminate() noexcept { VulkanReadPixels::VulkanReadPixels(VkDevice device) : mDevice(device) {} -void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x, uint32_t const y, +void VulkanReadPixels::run(VulkanRenderTarget* srcTarget, uint32_t const x, uint32_t const y, uint32_t const width, uint32_t const height, uint32_t const graphicsQueueFamilyIndex, PixelBufferDescriptor&& pbd, SelecteMemoryFunction const& selectMemoryFunc, OnReadCompleteFunction const& readCompleteFunc) { @@ -232,8 +232,7 @@ void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x }); VulkanAttachment const srcAttachment = srcTarget->getColor(0); - const VkImageSubresourceRange srcRange - = srcAttachment.getSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT); + VkImageSubresourceRange const srcRange = srcAttachment.getSubresourceRange(); srcTexture->transitionLayout(cmdbuffer, srcRange, VulkanLayout::TRANSFER_SRC); VkImageCopy const imageCopyRegion = { diff --git a/filament/backend/src/vulkan/VulkanReadPixels.h b/filament/backend/src/vulkan/VulkanReadPixels.h index 4c3e3297e94..e5dd2a31bb9 100644 --- a/filament/backend/src/vulkan/VulkanReadPixels.h +++ b/filament/backend/src/vulkan/VulkanReadPixels.h @@ -26,9 +26,7 @@ #include #include #include -#include #include -#include namespace filament::backend { @@ -74,8 +72,8 @@ class VulkanReadPixels { void terminate() noexcept; - void run(VulkanRenderTarget const* srcTarget, uint32_t x, uint32_t y, uint32_t width, - uint32_t height, uint32_t graphicsQueueFamilyIndex, PixelBufferDescriptor&& pbd, + void run(VulkanRenderTarget* srcTarget, uint32_t x, uint32_t y, uint32_t width, uint32_t height, + uint32_t graphicsQueueFamilyIndex, PixelBufferDescriptor&& pbd, SelecteMemoryFunction const& selectMemoryFunc, OnReadCompleteFunction const& readCompleteFunc); diff --git a/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp b/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp index cad4339a1b0..b316cdb3c21 100644 --- a/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp +++ b/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp @@ -1017,7 +1017,7 @@ class VulkanDescriptorSetManager::Impl { void updateInputAttachment(Handle, VulkanAttachment attachment) noexcept { VkDescriptorImageInfo info = { - .imageView = attachment.getImageView(VK_IMAGE_ASPECT_COLOR_BIT), + .imageView = attachment.getImageView(), .imageLayout = ImgUtil::getVkLayout(attachment.getLayout()), }; mInputAttachment = {attachment, info}; From 65c794881f18bd6ee816857f1ad53ce5f079a5ad Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Fri, 26 Apr 2024 15:26:16 -0700 Subject: [PATCH 19/31] vk: various clean up (#7805) - use namespace for ImageUtility - use FILAMENT_BACKEND_DEBUG_FLAG - remove unused usage flag type - add const list of VKFormat for iteration - fix sampler name debug --- filament/backend/src/vulkan/VulkanBlitter.cpp | 18 +- filament/backend/src/vulkan/VulkanConstants.h | 16 +- filament/backend/src/vulkan/VulkanDriver.cpp | 10 +- .../backend/src/vulkan/VulkanFboCache.cpp | 22 +- filament/backend/src/vulkan/VulkanHandles.cpp | 17 - filament/backend/src/vulkan/VulkanHandles.h | 8 - .../backend/src/vulkan/VulkanImageUtility.cpp | 65 +--- .../backend/src/vulkan/VulkanImageUtility.h | 107 ++++-- .../backend/src/vulkan/VulkanReadPixels.cpp | 7 +- .../backend/src/vulkan/VulkanStagePool.cpp | 3 +- filament/backend/src/vulkan/VulkanTexture.cpp | 19 +- filament/backend/src/vulkan/VulkanUtility.cpp | 2 - filament/backend/src/vulkan/VulkanUtility.h | 321 +++++++++++++++++- .../caching/VulkanDescriptorSetManager.cpp | 8 +- .../src/vulkan/platform/VulkanPlatform.cpp | 16 +- 15 files changed, 446 insertions(+), 193 deletions(-) diff --git a/filament/backend/src/vulkan/VulkanBlitter.cpp b/filament/backend/src/vulkan/VulkanBlitter.cpp index 215003e7c11..967337c769b 100644 --- a/filament/backend/src/vulkan/VulkanBlitter.cpp +++ b/filament/backend/src/vulkan/VulkanBlitter.cpp @@ -31,8 +31,6 @@ using namespace utils; namespace filament::backend { -using ImgUtil = VulkanImageUtility; - namespace { inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, VkFilter filter, @@ -61,15 +59,15 @@ inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, .dstOffsets = { dstRect[0], dstRect[1] }, }}; vkCmdBlitImage(cmdbuffer, - src.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), - dst.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_DST), + src.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), + dst.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, blitRegions, filter); if (oldSrcLayout == VulkanLayout::UNDEFINED) { - oldSrcLayout = ImgUtil::getDefaultLayout(src.texture->usage); + oldSrcLayout = imgutil::getDefaultLayout(src.texture->usage); } if (oldDstLayout == VulkanLayout::UNDEFINED) { - oldDstLayout = ImgUtil::getDefaultLayout(dst.texture->usage); + oldDstLayout = imgutil::getDefaultLayout(dst.texture->usage); } src.texture->transitionLayout(cmdbuffer, srcRange, oldSrcLayout); dst.texture->transitionLayout(cmdbuffer, dstRange, oldDstLayout); @@ -103,15 +101,15 @@ inline void resolveFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspe .extent = { src.getExtent2D().width, src.getExtent2D().height, 1 }, }}; vkCmdResolveImage(cmdbuffer, - src.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), - dst.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_DST), + src.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), + dst.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, resolveRegions); if (oldSrcLayout == VulkanLayout::UNDEFINED) { - oldSrcLayout = ImgUtil::getDefaultLayout(src.texture->usage); + oldSrcLayout = imgutil::getDefaultLayout(src.texture->usage); } if (oldDstLayout == VulkanLayout::UNDEFINED) { - oldDstLayout = ImgUtil::getDefaultLayout(dst.texture->usage); + oldDstLayout = imgutil::getDefaultLayout(dst.texture->usage); } src.texture->transitionLayout(cmdbuffer, srcRange, oldSrcLayout); dst.texture->transitionLayout(cmdbuffer, dstRange, oldDstLayout); diff --git a/filament/backend/src/vulkan/VulkanConstants.h b/filament/backend/src/vulkan/VulkanConstants.h index 03736d558d5..f870302e723 100644 --- a/filament/backend/src/vulkan/VulkanConstants.h +++ b/filament/backend/src/vulkan/VulkanConstants.h @@ -75,18 +75,14 @@ #define FVK_DEBUG_PERFORMANCE \ FVK_DEBUG_SYSTRACE -#define FVK_DEBUG_CORRECTNESS \ - FVK_DEBUG_VALIDATION | \ - FVK_DEBUG_SHADER_MODULE | \ - FVK_DEBUG_TEXTURE | \ - FVK_DEBUG_LAYOUT_TRANSITION - -#define FVK_DEBUG_RENDER_PASSES \ - FVK_DEBUG_GROUP_MARKERS | \ - FVK_DEBUG_PRINT_GROUP_MARKERS +#if defined(FILAMENT_BACKEND_DEBUG_FLAG) +#define FVK_DEBUG_FORWARDED_FLAG (FILAMENT_BACKEND_DEBUG_FLAG & FVK_DEBUG_EVERYTHING) +#else +#define FVK_DEBUG_FORWARDED_FLAG 0 +#endif #ifndef NDEBUG -#define FVK_DEBUG_FLAGS (FVK_DEBUG_PERFORMANCE) +#define FVK_DEBUG_FLAGS (FVK_DEBUG_PERFORMANCE | FVK_DEBUG_FORWARDED_FLAG) #else #define FVK_DEBUG_FLAGS 0 #endif diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 5d015a7a41a..d039d3ff267 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -22,7 +22,6 @@ #include "VulkanCommands.h" #include "VulkanDriverFactory.h" #include "VulkanHandles.h" -#include "VulkanImageUtility.h" #include "VulkanMemory.h" #include "VulkanTexture.h" @@ -206,8 +205,6 @@ void DebugUtils::setName(VkObjectType type, uint64_t handle, char const* name) { } #endif // FVK_EANBLED(FVK_DEBUG_DEBUG_UTILS) -using ImgUtil = VulkanImageUtility; - Dispatcher VulkanDriver::getDispatcher() const noexcept { return ConcreteDispatcher::make(); } @@ -1820,15 +1817,12 @@ void VulkanDriver::bindPipeline(PipelineState pipelineState) { texture = mEmptyTexture; } + VkSampler const vksampler = mSamplerCache.getSampler(boundSampler->s); + #if FVK_ENABLED_DEBUG_SAMPLER_NAME VulkanDriver::DebugUtils::setName(VK_OBJECT_TYPE_SAMPLER, reinterpret_cast(vksampler), bindingToName[binding].c_str()); - VulkanDriver::DebugUtils::setName(VK_OBJECT_TYPE_SAMPLER, - reinterpret_cast(samplerInfo.sampler), bindingToName[binding].c_str()); #endif - - VkSampler const vksampler = mSamplerCache.getSampler(boundSampler->s); - mDescriptorSetManager.updateSampler({}, binding, texture, vksampler); } diff --git a/filament/backend/src/vulkan/VulkanFboCache.cpp b/filament/backend/src/vulkan/VulkanFboCache.cpp index f4b222b1545..d8dc804fa05 100644 --- a/filament/backend/src/vulkan/VulkanFboCache.cpp +++ b/filament/backend/src/vulkan/VulkanFboCache.cpp @@ -29,8 +29,6 @@ using namespace bluevk; namespace filament::backend { -using ImgUtil = VulkanImageUtility; - bool VulkanFboCache::RenderPassEq::operator()(const RenderPassKey& k1, const RenderPassKey& k2) const { if (k1.initialColorLayoutMask != k2.initialColorLayoutMask) return false; @@ -197,7 +195,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { if (config.colorFormat[i] == VK_FORMAT_UNDEFINED) { continue; } - const VkImageLayout subpassLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT); + const VkImageLayout subpassLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT); uint32_t index; if (!hasSubpasses) { @@ -243,9 +241,9 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .stencilLoadOp = kDontCare, .stencilStoreOp = kDisableStore, .initialLayout = ((!discard && config.initialColorLayoutMask & (1 << i)) || clear) - ? ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT) - : ImgUtil::getVkLayout(VulkanLayout::UNDEFINED), - .finalLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + ? imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT) + : imgutil::getVkLayout(VulkanLayout::UNDEFINED), + .finalLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), }; } @@ -272,7 +270,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { pResolveAttachment->attachment = attachmentIndex; pResolveAttachment->layout - = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT_RESOLVE); + = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT_RESOLVE); ++pResolveAttachment; attachments[attachmentIndex++] = { @@ -282,8 +280,8 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .storeOp = kEnableStore, .stencilLoadOp = kDontCare, .stencilStoreOp = kDisableStore, - .initialLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), - .finalLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + .initialLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + .finalLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), }; } @@ -292,7 +290,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { const bool clear = any(config.clear & TargetBufferFlags::DEPTH); const bool discardStart = any(config.discardStart & TargetBufferFlags::DEPTH); const bool discardEnd = any(config.discardEnd & TargetBufferFlags::DEPTH); - depthAttachmentRef.layout = ImgUtil::getVkLayout(config.renderPassDepthLayout); + depthAttachmentRef.layout = imgutil::getVkLayout(config.renderPassDepthLayout); depthAttachmentRef.attachment = attachmentIndex; attachments[attachmentIndex++] = { .format = config.depthFormat, @@ -301,8 +299,8 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .storeOp = discardEnd ? kDisableStore : kEnableStore, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = ImgUtil::getVkLayout(config.initialDepthLayout), - .finalLayout = ImgUtil::getVkLayout(config.finalDepthLayout), + .initialLayout = imgutil::getVkLayout(config.initialDepthLayout), + .finalLayout = imgutil::getVkLayout(config.finalDepthLayout), }; } renderPassInfo.attachmentCount = attachmentIndex; diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp index 9d615ce5151..5c951ed5b2e 100644 --- a/filament/backend/src/vulkan/VulkanHandles.cpp +++ b/filament/backend/src/vulkan/VulkanHandles.cpp @@ -65,21 +65,6 @@ static constexpr Bitmask fromStageFlags(ShaderStageFlags2 flags, uint8_t binding return ret; } -UsageFlags getUsageFlags(uint16_t binding, ShaderStageFlags flags, UsageFlags src) { - // NOTE: if you modify this function, you also need to modify getShaderStageFlags. - assert_invariant(binding < MAX_SAMPLER_COUNT); - if (any(flags & ShaderStageFlags::VERTEX)) { - src.set(binding); - } - if (any(flags & ShaderStageFlags::FRAGMENT)) { - src.set(MAX_SAMPLER_COUNT + binding); - } - // TODO: add support for compute by extending SHADER_MODULE_COUNT and ensuring UsageFlags - // has 186 bits (MAX_SAMPLER_COUNT * 3) - // assert_invariant(!any(flags & ~(ShaderStageFlags::VERTEX | ShaderStageFlags::FRAGMENT))); - return src; -} - constexpr decltype(VulkanProgram::MAX_SHADER_MODULES) MAX_SHADER_MODULES = VulkanProgram::MAX_SHADER_MODULES; @@ -236,7 +221,6 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept auto& groupInfo = builder.getSamplerGroupInfo(); auto& bindingToSamplerIndex = mInfo->bindingToSamplerIndex; auto& bindings = mInfo->bindings; - auto& usage = mInfo->usage; for (uint8_t groupInd = 0; groupInd < Program::SAMPLER_BINDING_COUNT; groupInd++) { auto const& group = groupInfo[groupInd]; auto const& samplers = group.samplers; @@ -245,7 +229,6 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept bindingToSamplerIndex[binding] = (groupInd << 8) | (0xff & i); assert_invariant(bindings.find(binding) == bindings.end()); bindings.insert(binding); - usage = getUsageFlags(binding, group.stageFlags, usage); #if FVK_ENABLED_DEBUG_SAMPLER_NAME bindingToName[binding] = samplers[i].name.c_str(); diff --git a/filament/backend/src/vulkan/VulkanHandles.h b/filament/backend/src/vulkan/VulkanHandles.h index 8abdccfd968..111e1299a3f 100644 --- a/filament/backend/src/vulkan/VulkanHandles.h +++ b/filament/backend/src/vulkan/VulkanHandles.h @@ -198,10 +198,6 @@ struct VulkanProgram : public HwProgram, VulkanResource { return mInfo->bindingToSamplerIndex; } - inline UsageFlags getUsage() const { - return mInfo->usage; - } - // Get a list of the sampler binding indices so that we don't have to loop through all possible // samplers. inline BindingList const& getBindings() const { return mInfo->bindings; } @@ -235,10 +231,6 @@ struct VulkanProgram : public HwProgram, VulkanResource { #endif {} - // This bitset maps to each of the sampler in the sampler groups associated with this - // program, and whether each sampler is used in which shader (i.e. vert, frag, compute). - UsageFlags usage; - BindingList bindings; // We store the samplerGroupIndex as the top 8-bit and the index within each group as the lower 8-bit. diff --git a/filament/backend/src/vulkan/VulkanImageUtility.cpp b/filament/backend/src/vulkan/VulkanImageUtility.cpp index ada9de0ae26..6c038e429f0 100644 --- a/filament/backend/src/vulkan/VulkanImageUtility.cpp +++ b/filament/backend/src/vulkan/VulkanImageUtility.cpp @@ -26,38 +26,10 @@ using namespace bluevk; -namespace filament::backend { +namespace filament::backend::imgutil { namespace { -inline VkImageLayout getVkImageLayout(VulkanLayout layout) { - switch (layout) { - case VulkanLayout::UNDEFINED: - return VK_IMAGE_LAYOUT_UNDEFINED; - case VulkanLayout::READ_WRITE: - return VK_IMAGE_LAYOUT_GENERAL; - case VulkanLayout::READ_ONLY: - return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - case VulkanLayout::TRANSFER_SRC: - return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - case VulkanLayout::TRANSFER_DST: - return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - case VulkanLayout::DEPTH_ATTACHMENT: - return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - case VulkanLayout::DEPTH_SAMPLER: - return VK_IMAGE_LAYOUT_GENERAL; - case VulkanLayout::PRESENT: - return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - // Filament sometimes samples from one miplevel while writing to another level in the same - // texture (e.g. bloom does this). Moreover we'd like to avoid lots of expensive layout - // transitions. So, keep it simple and use GENERAL for all color-attachable textures. - case VulkanLayout::COLOR_ATTACHMENT: - return VK_IMAGE_LAYOUT_GENERAL; - case VulkanLayout::COLOR_ATTACHMENT_RESOLVE: - return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } -} - inline std::tuple getVkTransition(const VulkanLayoutTransition& transition) { @@ -70,11 +42,11 @@ getVkTransition(const VulkanLayoutTransition& transition) { srcStage = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; break; case VulkanLayout::COLOR_ATTACHMENT: - srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT - | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT - | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT - | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; break; case VulkanLayout::READ_WRITE: srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -154,27 +126,12 @@ getVkTransition(const VulkanLayoutTransition& transition) { } return std::make_tuple(srcAccessMask, dstAccessMask, srcStage, dstStage, - getVkImageLayout(transition.oldLayout), getVkImageLayout(transition.newLayout)); + getVkLayout(transition.oldLayout), getVkLayout(transition.newLayout)); } }// anonymous namespace -VkImageViewType VulkanImageUtility::getViewType(SamplerType target) { - switch (target) { - case SamplerType::SAMPLER_CUBEMAP: - return VK_IMAGE_VIEW_TYPE_CUBE; - case SamplerType::SAMPLER_2D_ARRAY: - return VK_IMAGE_VIEW_TYPE_2D_ARRAY; - case SamplerType::SAMPLER_CUBEMAP_ARRAY: - return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; - case SamplerType::SAMPLER_3D: - return VK_IMAGE_VIEW_TYPE_3D; - default: - return VK_IMAGE_VIEW_TYPE_2D; - } -} - -void VulkanImageUtility::transitionLayout(VkCommandBuffer cmdbuffer, +void transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition) { if (transition.oldLayout == transition.newLayout) { return; @@ -197,10 +154,6 @@ void VulkanImageUtility::transitionLayout(VkCommandBuffer cmdbuffer, vkCmdPipelineBarrier(cmdbuffer, srcStage, dstStage, 0, 0, nullptr, 0, nullptr, 1, &barrier); } -VkImageLayout VulkanImageUtility::getVkLayout(VulkanLayout layout) { - return getVkImageLayout(layout); -} - }// namespace filament::backend bool operator<(const VkImageSubresourceRange& a, const VkImageSubresourceRange& b) { @@ -222,7 +175,7 @@ bool operator<(const VkImageSubresourceRange& a, const VkImageSubresourceRange& case filament::backend::VulkanLayout::VALUE: { \ out << #VALUE; \ out << " [" \ - << filament::backend::VulkanImageUtility::getVkLayout( \ + << filament::backend::imgutil::getVkLayout( \ filament::backend::VulkanLayout::VALUE) \ << "]"; \ break; \ diff --git a/filament/backend/src/vulkan/VulkanImageUtility.h b/filament/backend/src/vulkan/VulkanImageUtility.h index efb3506471b..92aaac96ea4 100644 --- a/filament/backend/src/vulkan/VulkanImageUtility.h +++ b/filament/backend/src/vulkan/VulkanImageUtility.h @@ -59,44 +59,85 @@ struct VulkanLayoutTransition { VkImageSubresourceRange subresources; }; -class VulkanImageUtility { -public: - static VkImageViewType getViewType(SamplerType target); - - inline static VulkanLayout getDefaultLayout(TextureUsage usage) { - if (any(usage & TextureUsage::DEPTH_ATTACHMENT)) { - if (any(usage & TextureUsage::SAMPLEABLE)) { - return VulkanLayout::DEPTH_SAMPLER; - } else { - return VulkanLayout::DEPTH_ATTACHMENT; - } - } - - if (any(usage & TextureUsage::COLOR_ATTACHMENT)) { - return VulkanLayout::COLOR_ATTACHMENT; +namespace imgutil { + +inline VkImageViewType getViewType(SamplerType target) { + switch (target) { + case SamplerType::SAMPLER_CUBEMAP: + return VK_IMAGE_VIEW_TYPE_CUBE; + case SamplerType::SAMPLER_2D_ARRAY: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case SamplerType::SAMPLER_CUBEMAP_ARRAY: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case SamplerType::SAMPLER_3D: + return VK_IMAGE_VIEW_TYPE_3D; + default: + return VK_IMAGE_VIEW_TYPE_2D; + } +} + +inline VulkanLayout getDefaultLayout(TextureUsage usage) { + if (any(usage & TextureUsage::DEPTH_ATTACHMENT)) { + if (any(usage & TextureUsage::SAMPLEABLE)) { + return VulkanLayout::DEPTH_SAMPLER; + } else { + return VulkanLayout::DEPTH_ATTACHMENT; } - // Finally, the layout for an immutable texture is optimal read-only. - return VulkanLayout::READ_ONLY; } - inline static VulkanLayout getDefaultLayout(VkImageUsageFlags vkusage) { - TextureUsage usage {}; - if (vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - usage = usage | TextureUsage::DEPTH_ATTACHMENT; - } - if (vkusage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - usage = usage | TextureUsage::COLOR_ATTACHMENT; - } - if (vkusage & VK_IMAGE_USAGE_SAMPLED_BIT) { - usage = usage | TextureUsage::SAMPLEABLE; - } - return getDefaultLayout(usage); + if (any(usage & TextureUsage::COLOR_ATTACHMENT)) { + return VulkanLayout::COLOR_ATTACHMENT; } + // Finally, the layout for an immutable texture is optimal read-only. + return VulkanLayout::READ_ONLY; +} + +inline VulkanLayout getDefaultLayout(VkImageUsageFlags vkusage) { + TextureUsage usage{}; + if (vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + usage = usage | TextureUsage::DEPTH_ATTACHMENT; + } + if (vkusage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + usage = usage | TextureUsage::COLOR_ATTACHMENT; + } + if (vkusage & VK_IMAGE_USAGE_SAMPLED_BIT) { + usage = usage | TextureUsage::SAMPLEABLE; + } + return getDefaultLayout(usage); +} + +constexpr inline VkImageLayout getVkLayout(VulkanLayout layout) { + switch (layout) { + case VulkanLayout::UNDEFINED: + return VK_IMAGE_LAYOUT_UNDEFINED; + case VulkanLayout::READ_WRITE: + return VK_IMAGE_LAYOUT_GENERAL; + case VulkanLayout::READ_ONLY: + return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + case VulkanLayout::TRANSFER_SRC: + return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + case VulkanLayout::TRANSFER_DST: + return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + case VulkanLayout::DEPTH_ATTACHMENT: + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + case VulkanLayout::DEPTH_SAMPLER: + return VK_IMAGE_LAYOUT_GENERAL; + case VulkanLayout::PRESENT: + return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + // Filament sometimes samples from one miplevel while writing to another level in the + // same texture (e.g. bloom does this). Moreover we'd like to avoid lots of expensive + // layout transitions. So, keep it simple and use GENERAL for all color-attachable + // textures. + case VulkanLayout::COLOR_ATTACHMENT: + return VK_IMAGE_LAYOUT_GENERAL; + case VulkanLayout::COLOR_ATTACHMENT_RESOLVE: + return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } +} - static VkImageLayout getVkLayout(VulkanLayout layout); - - static void transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition); -}; +void transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition); + +} // namespace imgutil } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanReadPixels.cpp b/filament/backend/src/vulkan/VulkanReadPixels.cpp index 4fdebaadedd..2fc7ee2e090 100644 --- a/filament/backend/src/vulkan/VulkanReadPixels.cpp +++ b/filament/backend/src/vulkan/VulkanReadPixels.cpp @@ -28,7 +28,6 @@ using namespace bluevk; namespace filament::backend { -using ImgUtil = VulkanImageUtility; using TaskHandler = VulkanReadPixels::TaskHandler; using WorkloadFunc = TaskHandler::WorkloadFunc; using OnCompleteFunc = TaskHandler::OnCompleteFunc; @@ -218,7 +217,7 @@ void VulkanReadPixels::run(VulkanRenderTarget* srcTarget, uint32_t const x, uint }; vkBeginCommandBuffer(cmdbuffer, &binfo); - ImgUtil::transitionLayout(cmdbuffer, { + imgutil::transitionLayout(cmdbuffer, { .image = stagingImage, .oldLayout = VulkanLayout::UNDEFINED, .newLayout = VulkanLayout::TRANSFER_DST, @@ -265,8 +264,8 @@ void VulkanReadPixels::run(VulkanRenderTarget* srcTarget, uint32_t const x, uint imageCopyRegion.srcOffset.y + imageCopyRegion.extent.height <= srcExtent.height); vkCmdCopyImage(cmdbuffer, srcAttachment.getImage(), - ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), stagingImage, - ImgUtil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, &imageCopyRegion); + imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), stagingImage, + imgutil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, &imageCopyRegion); // Restore the source image layout. srcTexture->transitionLayout(cmdbuffer, srcRange, VulkanLayout::COLOR_ATTACHMENT); diff --git a/filament/backend/src/vulkan/VulkanStagePool.cpp b/filament/backend/src/vulkan/VulkanStagePool.cpp index 4c21104d003..05c843f0bae 100644 --- a/filament/backend/src/vulkan/VulkanStagePool.cpp +++ b/filament/backend/src/vulkan/VulkanStagePool.cpp @@ -17,6 +17,7 @@ #include "VulkanStagePool.h" #include "VulkanConstants.h" +#include "VulkanImageUtility.h" #include "VulkanMemory.h" #include "VulkanUtility.h" @@ -118,7 +119,7 @@ VulkanStageImage const* VulkanStagePool::acquireImage(PixelDataFormat format, Pi // VK_IMAGE_LAYOUT_PREINITIALIZED or VK_IMAGE_LAYOUT_GENERAL layout. Calling // vkGetImageSubresourceLayout for a linear image returns a subresource layout mapping that is // valid for either of those image layouts." - VulkanImageUtility::transitionLayout(cmdbuffer, { + imgutil::transitionLayout(cmdbuffer, { .image = image->image, .oldLayout = VulkanLayout::UNDEFINED, .newLayout = VulkanLayout::READ_WRITE, // (= VK_IMAGE_LAYOUT_GENERAL) diff --git a/filament/backend/src/vulkan/VulkanTexture.cpp b/filament/backend/src/vulkan/VulkanTexture.cpp index f611f40aac7..7819c00cedc 100644 --- a/filament/backend/src/vulkan/VulkanTexture.cpp +++ b/filament/backend/src/vulkan/VulkanTexture.cpp @@ -28,7 +28,6 @@ using namespace bluevk; namespace filament::backend { -using ImgUtil = VulkanImageUtility; VulkanTexture::VulkanTexture(VkDevice device, VmaAllocator allocator, VulkanCommands* commands, VkImage image, VkFormat format, uint8_t samples, uint32_t width, uint32_t height, TextureUsage tusage, VulkanStagePool& stagePool, bool heapAllocated) @@ -37,7 +36,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VmaAllocator allocator, VulkanComm VulkanResource( heapAllocated ? VulkanResourceType::HEAP_ALLOCATED : VulkanResourceType::TEXTURE), mVkFormat(format), - mViewType(ImgUtil::getViewType(target)), + mViewType(imgutil::getViewType(target)), mSwizzle({}), mTextureImage(image), mFullViewRange{ @@ -62,7 +61,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice, VulkanResource( heapAllocated ? VulkanResourceType::HEAP_ALLOCATED : VulkanResourceType::TEXTURE), mVkFormat(backend::getVkFormat(tformat)), - mViewType(ImgUtil::getViewType(target)), + mViewType(imgutil::getViewType(target)), mSwizzle(swizzle), mStagePool(stagePool), mDevice(device), @@ -236,7 +235,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice, VulkanCommandBuffer& commands = mCommands->get(); VkCommandBuffer const cmdbuf = commands.buffer(); commands.acquire(this); - transitionLayout(cmdbuf, mFullViewRange, ImgUtil::getDefaultLayout(imageInfo.usage)); + transitionLayout(cmdbuf, mFullViewRange, imgutil::getDefaultLayout(imageInfo.usage)); } } @@ -327,10 +326,10 @@ void VulkanTexture::updateImage(const PixelBufferDescriptor& data, uint32_t widt VulkanLayout const newLayout = VulkanLayout::TRANSFER_DST; VulkanLayout nextLayout = getLayout(transitionRange.baseArrayLayer, miplevel); - VkImageLayout const newVkLayout = ImgUtil::getVkLayout(newLayout); + VkImageLayout const newVkLayout = imgutil::getVkLayout(newLayout); if (nextLayout == VulkanLayout::UNDEFINED) { - nextLayout = ImgUtil::getDefaultLayout(this->usage); + nextLayout = imgutil::getDefaultLayout(this->usage); } transitionLayout(cmdbuf, transitionRange, newLayout); @@ -374,8 +373,8 @@ void VulkanTexture::updateImageWithBlit(const PixelBufferDescriptor& hostData, u VulkanLayout const oldLayout = getLayout(layer, miplevel); transitionLayout(cmdbuf, range, newLayout); - vkCmdBlitImage(cmdbuf, stage->image, ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), - mTextureImage, ImgUtil::getVkLayout(newLayout), 1, blitRegions, VK_FILTER_NEAREST); + vkCmdBlitImage(cmdbuf, stage->image, imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), + mTextureImage, imgutil::getVkLayout(newLayout), 1, blitRegions, VK_FILTER_NEAREST); transitionLayout(cmdbuf, range, oldLayout); } @@ -467,7 +466,7 @@ void VulkanTexture::transitionLayout(VkCommandBuffer cmdbuf, const VkImageSubres for (uint32_t i = firstLayer; i < lastLayer; ++i) { for (uint32_t j = firstLevel; j < lastLevel; ++j) { VulkanLayout const layout = getLayout(i, j); - ImgUtil::transitionLayout(cmdbuf, { + imgutil::transitionLayout(cmdbuf, { .image = mTextureImage, .oldLayout = layout, .newLayout = newLayout, @@ -482,7 +481,7 @@ void VulkanTexture::transitionLayout(VkCommandBuffer cmdbuf, const VkImageSubres } } } else { - ImgUtil::transitionLayout(cmdbuf, { + imgutil::transitionLayout(cmdbuf, { .image = mTextureImage, .oldLayout = oldLayout, .newLayout = newLayout, diff --git a/filament/backend/src/vulkan/VulkanUtility.cpp b/filament/backend/src/vulkan/VulkanUtility.cpp index 4b60b92f4d8..612acb20e5d 100644 --- a/filament/backend/src/vulkan/VulkanUtility.cpp +++ b/filament/backend/src/vulkan/VulkanUtility.cpp @@ -655,6 +655,4 @@ uint8_t reduceSampleCount(uint8_t sampleCount, VkSampleCountFlags mask) { return mostSignificantBit((sampleCount - 1) & mask); } -_BitCountHelper _BitCountHelper::BitCounter = {}; - } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanUtility.h b/filament/backend/src/vulkan/VulkanUtility.h index a5e053633fa..4dd40359110 100644 --- a/filament/backend/src/vulkan/VulkanUtility.h +++ b/filament/backend/src/vulkan/VulkanUtility.h @@ -90,12 +90,321 @@ utils::FixedCapacityVector enumerate( #undef EXPAND_ENUM_NO_ARGS #undef EXPAND_ENUM_ARGS -// Used across pipeline related classes. -using UsageFlags = utils::bitset128; - // Useful shorthands using VkFormatList = utils::FixedCapacityVector; +// Copied from +// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkFormat.html +constexpr VkFormat ALL_VK_FORMATS[] = { + VK_FORMAT_UNDEFINED, + VK_FORMAT_R4G4_UNORM_PACK8, + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_B4G4R4A4_UNORM_PACK16, + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_R5G5B5A1_UNORM_PACK16, + VK_FORMAT_B5G5R5A1_UNORM_PACK16, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SNORM, + VK_FORMAT_R8_USCALED, + VK_FORMAT_R8_SSCALED, + VK_FORMAT_R8_UINT, + VK_FORMAT_R8_SINT, + VK_FORMAT_R8_SRGB, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_USCALED, + VK_FORMAT_R8G8_SSCALED, + VK_FORMAT_R8G8_UINT, + VK_FORMAT_R8G8_SINT, + VK_FORMAT_R8G8_SRGB, + VK_FORMAT_R8G8B8_UNORM, + VK_FORMAT_R8G8B8_SNORM, + VK_FORMAT_R8G8B8_USCALED, + VK_FORMAT_R8G8B8_SSCALED, + VK_FORMAT_R8G8B8_UINT, + VK_FORMAT_R8G8B8_SINT, + VK_FORMAT_R8G8B8_SRGB, + VK_FORMAT_B8G8R8_UNORM, + VK_FORMAT_B8G8R8_SNORM, + VK_FORMAT_B8G8R8_USCALED, + VK_FORMAT_B8G8R8_SSCALED, + VK_FORMAT_B8G8R8_UINT, + VK_FORMAT_B8G8R8_SINT, + VK_FORMAT_B8G8R8_SRGB, + VK_FORMAT_R8G8B8A8_UNORM, + VK_FORMAT_R8G8B8A8_SNORM, + VK_FORMAT_R8G8B8A8_USCALED, + VK_FORMAT_R8G8B8A8_SSCALED, + VK_FORMAT_R8G8B8A8_UINT, + VK_FORMAT_R8G8B8A8_SINT, + VK_FORMAT_R8G8B8A8_SRGB, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SNORM, + VK_FORMAT_B8G8R8A8_USCALED, + VK_FORMAT_B8G8R8A8_SSCALED, + VK_FORMAT_B8G8R8A8_UINT, + VK_FORMAT_B8G8R8A8_SINT, + VK_FORMAT_B8G8R8A8_SRGB, + VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_USCALED_PACK32, + VK_FORMAT_A8B8G8R8_SSCALED_PACK32, + VK_FORMAT_A8B8G8R8_UINT_PACK32, + VK_FORMAT_A8B8G8R8_SINT_PACK32, + VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_A2R10G10B10_UNORM_PACK32, + VK_FORMAT_A2R10G10B10_SNORM_PACK32, + VK_FORMAT_A2R10G10B10_USCALED_PACK32, + VK_FORMAT_A2R10G10B10_SSCALED_PACK32, + VK_FORMAT_A2R10G10B10_UINT_PACK32, + VK_FORMAT_A2R10G10B10_SINT_PACK32, + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A2B10G10R10_SNORM_PACK32, + VK_FORMAT_A2B10G10R10_USCALED_PACK32, + VK_FORMAT_A2B10G10R10_SSCALED_PACK32, + VK_FORMAT_A2B10G10R10_UINT_PACK32, + VK_FORMAT_A2B10G10R10_SINT_PACK32, + VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_SNORM, + VK_FORMAT_R16_USCALED, + VK_FORMAT_R16_SSCALED, + VK_FORMAT_R16_UINT, + VK_FORMAT_R16_SINT, + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16_USCALED, + VK_FORMAT_R16G16_SSCALED, + VK_FORMAT_R16G16_UINT, + VK_FORMAT_R16G16_SINT, + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16B16_UNORM, + VK_FORMAT_R16G16B16_SNORM, + VK_FORMAT_R16G16B16_USCALED, + VK_FORMAT_R16G16B16_SSCALED, + VK_FORMAT_R16G16B16_UINT, + VK_FORMAT_R16G16B16_SINT, + VK_FORMAT_R16G16B16_SFLOAT, + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_R16G16B16A16_USCALED, + VK_FORMAT_R16G16B16A16_SSCALED, + VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SINT, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32G32_UINT, + VK_FORMAT_R32G32_SINT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32B32_UINT, + VK_FORMAT_R32G32B32_SINT, + VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_R32G32B32A32_SINT, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R64_UINT, + VK_FORMAT_R64_SINT, + VK_FORMAT_R64_SFLOAT, + VK_FORMAT_R64G64_UINT, + VK_FORMAT_R64G64_SINT, + VK_FORMAT_R64G64_SFLOAT, + VK_FORMAT_R64G64B64_UINT, + VK_FORMAT_R64G64B64_SINT, + VK_FORMAT_R64G64B64_SFLOAT, + VK_FORMAT_R64G64B64A64_UINT, + VK_FORMAT_R64G64B64A64_SINT, + VK_FORMAT_R64G64B64A64_SFLOAT, + VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, + VK_FORMAT_D16_UNORM, + VK_FORMAT_X8_D24_UNORM_PACK32, + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_S8_UINT, + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_BC1_RGB_UNORM_BLOCK, + VK_FORMAT_BC1_RGB_SRGB_BLOCK, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, + VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, + VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, + VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, + VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, + VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, + VK_FORMAT_EAC_R11_UNORM_BLOCK, + VK_FORMAT_EAC_R11_SNORM_BLOCK, + VK_FORMAT_EAC_R11G11_UNORM_BLOCK, + VK_FORMAT_EAC_R11G11_SNORM_BLOCK, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_G8B8G8R8_422_UNORM, + VK_FORMAT_B8G8R8G8_422_UNORM, + VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, + VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, + VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, + VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, + VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, + VK_FORMAT_R10X6_UNORM_PACK16, + VK_FORMAT_R10X6G10X6_UNORM_2PACK16, + VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, + VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, + VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16, + VK_FORMAT_R12X4_UNORM_PACK16, + VK_FORMAT_R12X4G12X4_UNORM_2PACK16, + VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, + VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, + VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16, + VK_FORMAT_G16B16G16R16_422_UNORM, + VK_FORMAT_B16G16R16G16_422_UNORM, + VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, + VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, + VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, + VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, + VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, + VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, + VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, + VK_FORMAT_A4R4G4B4_UNORM_PACK16, + VK_FORMAT_A4B4G4R4_UNORM_PACK16, + VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK, + VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK, + VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK, + VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK, + VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK, + VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK, + VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK, + VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG, + VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG, + VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG, + VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG, +// Not supported (yet) by bluevk +// VK_FORMAT_R16G16_SFIXED5_NV, +// VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR, +// VK_FORMAT_A8_UNORM_KHR, +// VK_FORMAT_A8_UNORM, + VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT, + VK_FORMAT_G8B8G8R8_422_UNORM_KHR, + VK_FORMAT_B8G8R8G8_422_UNORM_KHR, + VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR, + VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR, + VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR, + VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR, + VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR, + VK_FORMAT_R10X6_UNORM_PACK16_KHR, + VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR, + VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR, + VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR, + VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR, + VK_FORMAT_R12X4_UNORM_PACK16_KHR, + VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR, + VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR, + VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR, + VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR, + VK_FORMAT_G16B16G16R16_422_UNORM_KHR, + VK_FORMAT_B16G16R16G16_422_UNORM_KHR, + VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR, + VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR, + VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR, + VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR, + VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_KHR, + VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT, + VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT, + VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, + VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, + VK_FORMAT_R16G16_S10_5_NV, +}; + // An Array that will be fixed capacity, but the "size" (as in user added elements) is variable. // Note that this class is movable. template @@ -281,6 +590,7 @@ struct DescriptorSetLayout { } // namespace descset +namespace { // Use constexpr to statically generate a bit count table for 8-bit numbers. struct _BitCountHelper { constexpr _BitCountHelper() : data{} { @@ -303,14 +613,15 @@ struct _BitCountHelper { return count; } - static _BitCountHelper BitCounter; private: uint8_t data[256]; }; +} // namespace anonymous template inline uint8_t countBits(MaskType num) { - return _BitCountHelper::BitCounter.count(num); + static _BitCountHelper BitCounter = {}; + return BitCounter.count(num); } // This is useful for counting the total number of descriptors for both vertex and fragment stages. diff --git a/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp b/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp index b316cdb3c21..35228e95e6a 100644 --- a/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp +++ b/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp @@ -42,8 +42,6 @@ constexpr uint8_t MAX_INPUT_ATTACHMENT_BINDING = 1; constexpr uint8_t MAX_BINDINGS = MAX_SAMPLER_BINDING + MAX_UBO_BINDING + MAX_INPUT_ATTACHMENT_BINDING; -using ImgUtil = VulkanImageUtility; - using Bitmask = VulkanDescriptorSetLayout::Bitmask; using DescriptorCount = VulkanDescriptorSetLayout::Count; using UBOMap = std::array, MAX_UBO_BINDING>; @@ -1010,7 +1008,7 @@ class VulkanDescriptorSetManager::Impl { } else { info.imageView = texture->getViewForType(range, expectedType); } - info.imageLayout = ImgUtil::getVkLayout(texture->getPrimaryImageLayout()); + info.imageLayout = imgutil::getVkLayout(texture->getPrimaryImageLayout()); mSamplerMap[binding] = {info, texture}; mResources.acquire(texture); } @@ -1018,7 +1016,7 @@ class VulkanDescriptorSetManager::Impl { void updateInputAttachment(Handle, VulkanAttachment attachment) noexcept { VkDescriptorImageInfo info = { .imageView = attachment.getImageView(), - .imageLayout = ImgUtil::getVkLayout(attachment.getLayout()), + .imageLayout = imgutil::getVkLayout(attachment.getLayout()), }; mInputAttachment = {attachment, info}; mResources.acquire(attachment.texture); @@ -1042,7 +1040,7 @@ class VulkanDescriptorSetManager::Impl { mPlaceHolderImageInfo = { .sampler = sampler, .imageView = texture->getPrimaryImageView(), - .imageLayout = ImgUtil::getVkLayout(texture->getPrimaryImageLayout()), + .imageLayout = imgutil::getVkLayout(texture->getPrimaryImageLayout()), }; } diff --git a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp index c58e5d19d0a..687887a6988 100644 --- a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp +++ b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp @@ -137,18 +137,14 @@ void printDepthFormats(VkPhysicalDevice device) { const VkFormatFeatureFlags required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; utils::slog.i << "Sampleable depth formats: "; - for (VkFormat format = (VkFormat) 1;;) { + for (VkFormat format : ALL_VK_FORMATS) { VkFormatProperties props; vkGetPhysicalDeviceFormatProperties(device, format, &props); if ((props.optimalTilingFeatures & required) == required) { utils::slog.i << format << " "; } - if (format == VK_FORMAT_ASTC_12x12_SRGB_BLOCK) { - utils::slog.i << utils::io::endl; - break; - } - format = (VkFormat) (1 + (int) format); } + utils::slog.i << utils::io::endl; } #endif @@ -405,7 +401,7 @@ inline int deviceTypeOrder(VkPhysicalDeviceType deviceType) { case VK_PHYSICAL_DEVICE_TYPE_OTHER: return 1; default: - utils::slog.w << "devcieTypeOrder: Unexpected deviceType: " << deviceType + utils::slog.w << "deviceTypeOrder: Unexpected deviceType: " << deviceType << utils::io::endl; return -1; } @@ -528,7 +524,7 @@ VkFormatList findBlittableDepthStencilFormats(VkPhysicalDevice device) { std::vector selectedFormats; VkFormatFeatureFlags const required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - for (VkFormat format = (VkFormat) 1;;) { + for (VkFormat format : ALL_VK_FORMATS) { if (isVkDepthFormat(format)) { VkFormatProperties props; vkGetPhysicalDeviceFormatProperties(device, format, &props); @@ -536,10 +532,6 @@ VkFormatList findBlittableDepthStencilFormats(VkPhysicalDevice device) { selectedFormats.push_back(format); } } - format = (VkFormat) (1 + (int) format); - if (format == VK_FORMAT_ASTC_12x12_SRGB_BLOCK) { - break; - } } VkFormatList ret(selectedFormats.size()); std::copy(selectedFormats.begin(), selectedFormats.end(), ret.begin()); From 8ab85253592c809da842d7db5fbe9de95864b8e8 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Fri, 26 Apr 2024 14:13:10 -0700 Subject: [PATCH 20/31] fix a missing ubo upload in bloom set-up --- filament/src/PostProcessManager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp index 9dd8b9d5ff9..59520e0c143 100644 --- a/filament/src/PostProcessManager.cpp +++ b/filament/src/PostProcessManager.cpp @@ -2110,6 +2110,7 @@ PostProcessManager::BloomPassOutput PostProcessManager::bloom(FrameGraph& fg, mi->setParameter("source", hwOut, { .filterMag = SamplerMagFilter::LINEAR, .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST}); + mi->commit(driver); mi->use(driver); auto pipeline = material.getPipelineState(mEngine); From 4c75e6e9a294ddcc8e46043f8108b7a4f18fb47a Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Fri, 26 Apr 2024 17:28:08 -0700 Subject: [PATCH 21/31] gltfio: enable extended implementation (#7776) This change will enable proper flat-shading and MikkTSpace. Caveats: - Only for disk-local glTF resources - iOS, Web, Android do not work as of now Fixes #6358, #7444 --- libs/gltfio/include/gltfio/AssetLoader.h | 21 +++++ libs/gltfio/src/AssetLoader.cpp | 79 ++++++++++++++++--- libs/gltfio/src/FFilamentAsset.h | 75 ++++++++++++++---- libs/gltfio/src/ResourceLoader.cpp | 61 ++++++++------ .../src/extended/AssetLoaderExtended.cpp | 5 +- .../gltfio/src/extended/AssetLoaderExtended.h | 30 +------ .../src/extended/ResourceLoaderExtended.h | 4 +- .../src/extended/TangentSpaceMeshWrapper.cpp | 28 ++++--- .../src/extended/TangentSpaceMeshWrapper.h | 12 ++- 9 files changed, 218 insertions(+), 97 deletions(-) diff --git a/libs/gltfio/include/gltfio/AssetLoader.h b/libs/gltfio/include/gltfio/AssetLoader.h index f516166a800..bf650f69477 100644 --- a/libs/gltfio/include/gltfio/AssetLoader.h +++ b/libs/gltfio/include/gltfio/AssetLoader.h @@ -38,6 +38,23 @@ namespace filament::gltfio { class NodeManager; +// Use this struct to enable mikktspace-based tangent-space computation. +/** + * \struct AssetConfigurationExtended AssetLoader.h gltfio/AssetLoader.h + * \brief extends struct AssetConfiguration + * Useful if client needs mikktspace tangent space computation. + * NOTE: Android, iOS, Web are not supported. And only disk-local glTF resources are supported. + */ +struct AssetConfigurationExtended { + //! Optional The same parameter as provided to \struct ResourceConfiguration ResourceLoader.h + //! gltfio/ResourceLoader.h + char const* gltfPath; + + //! Client can use this method to check if the extended implementation is supported on their + //! platform or not. + static bool isSupported(); +}; + /** * \struct AssetConfiguration AssetLoader.h gltfio/AssetLoader.h * \brief Construction parameters for AssetLoader. @@ -62,6 +79,10 @@ struct AssetConfiguration { //! Optional default node name for anonymous nodes char* defaultNodeName = nullptr; + + //! Optional to enable mikktspace tangents. Lifetime of struct only needs to be maintained for + // the duration of the constructor of AssetLoader. + AssetConfigurationExtended* ext = nullptr; }; /** diff --git a/libs/gltfio/src/AssetLoader.cpp b/libs/gltfio/src/AssetLoader.cpp index fa73eb27417..61a2c6ed589 100644 --- a/libs/gltfio/src/AssetLoader.cpp +++ b/libs/gltfio/src/AssetLoader.cpp @@ -24,6 +24,7 @@ #include "FTrsTransformManager.h" #include "GltfEnums.h" #include "Utility.h" +#include "extended/AssetLoaderExtended.h" #include #include @@ -57,6 +58,8 @@ #include "downcast.h" +#include + using namespace filament; using namespace filament::math; using namespace utils; @@ -202,14 +205,21 @@ class MaterialInstanceCache { }; struct FAssetLoader : public AssetLoader { - FAssetLoader(const AssetConfiguration& config) : + FAssetLoader(AssetConfiguration const& config) : mEntityManager(config.entities ? *config.entities : EntityManager::get()), mRenderableManager(config.engine->getRenderableManager()), mNameManager(config.names), mTransformManager(config.engine->getTransformManager()), mMaterials(*config.materials), mEngine(*config.engine), - mDefaultNodeName(config.defaultNodeName) {} + mDefaultNodeName(config.defaultNodeName) { + if (config.ext) { + ASSERT_PRECONDITION(AssetConfigurationExtended::isSupported(), + "Extend asset loading is not supported on this platform"); + mLoaderExtended = std::make_unique( + *config.ext, config.engine, mMaterials); + } + } FFilamentAsset* createAsset(const uint8_t* bytes, uint32_t nbytes); FFilamentAsset* createInstancedAsset(const uint8_t* bytes, uint32_t numBytes, @@ -292,6 +302,9 @@ struct FAssetLoader : public AssetLoader { // Weak reference to the largest dummy buffer so far in the current loading phase. BufferObject* mDummyBufferObject = nullptr; + +public: + std::unique_ptr mLoaderExtended; }; FILAMENT_DOWNCAST(AssetLoader) @@ -422,7 +435,7 @@ FFilamentAsset* FAssetLoader::createRootAsset(const cgltf_data* srcAsset) { mDummyBufferObject = nullptr; FFilamentAsset* fAsset = new FFilamentAsset(&mEngine, mNameManager, &mEntityManager, - &mNodeManager, &mTrsTransformManager, srcAsset); + &mNodeManager, &mTrsTransformManager, srcAsset, (bool) mLoaderExtended); // It is not an error for a glTF file to have zero scenes. fAsset->mScenes.clear(); @@ -620,12 +633,41 @@ void FAssetLoader::createPrimitives(const cgltf_node* node, const char* name, for (cgltf_size index = 0, n = mesh->primitives_count; index < n; ++index) { Primitive& outputPrim = prims[index]; - const cgltf_primitive& inputPrim = mesh->primitives[index]; - - // Create a Filament VertexBuffer and IndexBuffer for this prim if we haven't already. - if (!outputPrim.vertices && !createPrimitive(inputPrim, name, &outputPrim, fAsset)) { - mError = true; - return; + cgltf_primitive& inputPrim = mesh->primitives[index]; + + if (!outputPrim.vertices) { + if (mLoaderExtended) { + auto& resourceInfo = std::get(fAsset->mResourceInfo); + resourceInfo.uriDataCache = mLoaderExtended->getUriDataCache(); + AssetLoaderExtended::Input input{ + .gltf = gltf, + .prim = &inputPrim, + .name = name, + .dracoCache = &fAsset->mSourceAsset->dracoCache, + .material = getMaterial(gltf, inputPrim.material, &outputPrim.uvmap, + utility::primitiveHasVertexColor(&inputPrim)), + }; + + mError = !mLoaderExtended->createPrimitive(&input, &outputPrim, resourceInfo.slots); + if (!mError) { + if (outputPrim.vertices) { + fAsset->mVertexBuffers.push_back(outputPrim.vertices); + } + if (outputPrim.indices) { + fAsset->mIndexBuffers.push_back(outputPrim.indices); + } + if (outputPrim.targets) { + fAsset->mMorphTargetBuffers.push_back(outputPrim.targets); + } + } + } else { + // Create a Filament VertexBuffer and IndexBuffer for this prim if we haven't + // already. + mError = !createPrimitive(inputPrim, name, &outputPrim, fAsset); + } + if (mError) { + return; + } } // Expand the object-space bounding box. @@ -777,6 +819,8 @@ void FAssetLoader::createMaterialVariants(const cgltf_mesh* mesh, Entity entity, bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* name, Primitive* outPrim, FFilamentAsset* fAsset) { + using BufferSlot = FFilamentAsset::ResourceInfo::BufferSlot; + Material* material = getMaterial(fAsset->mSourceAsset->hierarchy, inPrim.material, &outPrim->uvmap, primitiveHasVertexColor(inPrim)); AttributeBitset requiredAttributes = material->getRequiredAttributes(); @@ -787,8 +831,8 @@ bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* na // request from Google. // Create a little lambda that appends to the asset's vertex buffer slots. - auto slots = &fAsset->mBufferSlots; - auto addBufferSlot = [slots](BufferSlot entry) { + auto slots = &std::get(fAsset->mResourceInfo).mBufferSlots; + auto addBufferSlot = [slots](FFilamentAsset::ResourceInfo::BufferSlot entry) { slots->push_back(entry); }; @@ -807,7 +851,7 @@ bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* na .bufferType(indexType) .build(mEngine); - BufferSlot slot = { accessor }; + FFilamentAsset::ResourceInfo::BufferSlot slot = { accessor }; slot.indexBuffer = indices; addBufferSlot(slot); } else if (inPrim.attributes_count > 0) { @@ -1049,7 +1093,8 @@ bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* na outPrim->indices = indices; outPrim->vertices = vertices; - fAsset->mPrimitives.push_back({&inPrim, vertices}); + auto& primitives = std::get(fAsset->mResourceInfo).mPrimitives; + primitives.push_back({&inPrim, vertices}); fAsset->mVertexBuffers.push_back(vertices); for (size_t i = firstSlot; i < slots->size(); ++i) { @@ -1549,6 +1594,14 @@ void FAssetLoader::importSkins(FFilamentInstance* instance, const cgltf_data* gl } } +bool AssetConfigurationExtended::isSupported() { +#if defined(__ANDROID__) || defined(IOS) || defined(__EMSCRIPTEN__) + return false; +#else + return true; +#endif +} + AssetLoader* AssetLoader::create(const AssetConfiguration& config) { return new FAssetLoader(config); } diff --git a/libs/gltfio/src/FFilamentAsset.h b/libs/gltfio/src/FFilamentAsset.h index a29c016d24e..55cdad2c926 100644 --- a/libs/gltfio/src/FFilamentAsset.h +++ b/libs/gltfio/src/FFilamentAsset.h @@ -49,6 +49,7 @@ #include +#include #include #ifdef NDEBUG @@ -74,16 +75,6 @@ namespace filament::gltfio { struct Wireframe; -// Encapsulates VertexBuffer::setBufferAt() or IndexBuffer::setBuffer(). -struct BufferSlot { - const cgltf_accessor* accessor; - cgltf_attribute_type attribute; - int bufferIndex; // for vertex buffer and morph target buffer only - VertexBuffer* vertexBuffer; - IndexBuffer* indexBuffer; - MorphTargetBuffer* morphTargetBuffer; -}; - // Stores a connection between Texture and MaterialInstance; consumed by resource loader so that it // can call "setParameter" on the given MaterialInstance after the Texture has been created. // Since material instances are not typically shared between FilamentInstance, the slots are a @@ -110,14 +101,24 @@ struct Primitive { using MeshCache = utils::FixedCapacityVector>; struct FFilamentAsset : public FilamentAsset { + struct ResourceInfo; + struct ResourceInfoExtended; + FFilamentAsset(Engine* engine, utils::NameComponentManager* names, utils::EntityManager* entityManager, NodeManager* nodeManager, - TrsTransformManager* trsTransformManager, const cgltf_data* srcAsset) : + TrsTransformManager* trsTransformManager, const cgltf_data* srcAsset, + bool useExtendedAlgo) : mEngine(engine), mNameManager(names), mEntityManager(entityManager), mNodeManager(nodeManager), mTrsTransformManager(trsTransformManager), mSourceAsset(new SourceAsset {(cgltf_data*)srcAsset}), mTextures(srcAsset->textures_count), - mMeshCache(srcAsset->meshes_count) {} + mMeshCache(srcAsset->meshes_count) { + if (!useExtendedAlgo) { + mResourceInfo = ResourceInfo{}; + } else { + mResourceInfo = ResourceInfoExtended{}; + } + } ~FFilamentAsset(); @@ -228,6 +229,10 @@ struct FFilamentAsset : public FilamentAsset { mDetachedFilamentComponents = true; } + bool isUsingExtendedAlgorithm() { + return std::holds_alternative(mResourceInfo); + } + // end public API // If a Filament Texture for the given args already exists, calls setParameter() and returns @@ -315,8 +320,50 @@ struct FFilamentAsset : public FilamentAsset { MeshCache mMeshCache; // Asset information that is produced by AssetLoader and consumed by ResourceLoader: - std::vector mBufferSlots; - std::vector > mPrimitives; + struct ResourceInfo { + // Encapsulates VertexBuffer::setBufferAt() or IndexBuffer::setBuffer(). + struct BufferSlot { + const cgltf_accessor* accessor; + cgltf_attribute_type attribute; + int bufferIndex;// for vertex buffer and morph target buffer only + VertexBuffer* vertexBuffer; + IndexBuffer* indexBuffer; + MorphTargetBuffer* morphTargetBuffer; + }; + + std::vector mBufferSlots; + std::vector> mPrimitives; + }; + struct ResourceInfoExtended { + // Used to denote a generated buffer. Set as `index in `CgltfAttribute`. + static constexpr int const GENERATED_0_INDEX = -1; + static constexpr int const GENERATED_1_INDEX = -2; + + struct BufferSlot { + VertexBuffer* vertices = nullptr; + IndexBuffer* indices = nullptr; + MorphTargetBuffer* target = nullptr; + int slot = -1; + size_t sizeInBytes = 0; + + void* data = nullptr; + + // MorphTarget-only data; + struct { + short4* tbn = nullptr; + float3* positions = nullptr; + } targetData; + }; + + std::vector slots; + + // This is to workaround the fact that the original ResourceLoader owns the UriDataCache. In + // the extended implementation, we create it in AssetLoader. We pass it along to + // ResourceLoader here. + UriDataCacheHandle uriDataCache; + }; + + std::variant mResourceInfo; }; FILAMENT_DOWNCAST(FilamentAsset) diff --git a/libs/gltfio/src/ResourceLoader.cpp b/libs/gltfio/src/ResourceLoader.cpp index 135cca7c0e9..9b077e94357 100644 --- a/libs/gltfio/src/ResourceLoader.cpp +++ b/libs/gltfio/src/ResourceLoader.cpp @@ -22,6 +22,7 @@ #include "TangentsJob.h" #include "downcast.h" #include "Utility.h" +#include "extended/ResourceLoaderExtended.h" #include #include @@ -229,8 +230,8 @@ inline void createSkins(cgltf_data const* gltf, bool normalize, inline void uploadBuffers(FFilamentAsset* asset, Engine& engine, UriDataCacheHandle uriDataCache) { // Upload VertexBuffer and IndexBuffer data to the GPU. - auto& slots = asset->mBufferSlots; - for (auto slot: slots) { + auto& slots = std::get(asset->mResourceInfo).mBufferSlots; + for (auto const& slot: slots) { const cgltf_accessor* accessor = slot.accessor; if (!accessor->buffer_view) { continue; @@ -390,6 +391,11 @@ bool ResourceLoader::loadResources(FilamentAsset* asset) { // This is a workaround in case of using extended algo, please see description in // FFilamentAsset.h + if (fasset->isUsingExtendedAlgorithm()) { + pImpl->mUriDataCache = + std::get(fasset->mResourceInfo).uriDataCache; + } + return loadResources(fasset, false); } @@ -402,6 +408,8 @@ bool ResourceLoader::loadResources(FFilamentAsset* asset, bool async) { } asset->mResourcesLoaded = true; + bool const isExtendedAlgo = asset->isUsingExtendedAlgorithm(); + // At this point, any entities that are created in the future (i.e. dynamically added instances) // will not need the progressive feature to be enabled. This simplifies the dependency graph and // prevents it from growing. @@ -414,30 +422,35 @@ bool ResourceLoader::loadResources(FFilamentAsset* asset, bool async) { cgltf_data const* gltf = asset->mSourceAsset->hierarchy; - utility::loadCgltfBuffers(gltf, pImpl->mGltfPath.c_str(), pImpl->mUriDataCache); + if (!isExtendedAlgo) { + utility::loadCgltfBuffers(gltf, pImpl->mGltfPath.c_str(), pImpl->mUriDataCache); - // Decompress Draco meshes early on, which allows us to exploit subsequent processing such - // as tangent generation. - DracoCache* dracoCache = &asset->mSourceAsset->dracoCache; - auto& primitives = asset->mPrimitives; - // Go through every primitive and check if it has a Draco mesh. - for (auto& [prim, vertexBuffer]: primitives) { - if (!prim->has_draco_mesh_compression) { - continue; + // Decompress Draco meshes early on, which allows us to exploit subsequent processing such + // as tangent generation. + DracoCache* dracoCache = &asset->mSourceAsset->dracoCache; + auto& primitives = std::get(asset->mResourceInfo).mPrimitives; + // Go through every primitive and check if it has a Draco mesh. + for (auto& [prim, vertexBuffer]: primitives) { + if (!prim->has_draco_mesh_compression) { + continue; + } + utility::decodeDracoMeshes(gltf, prim, dracoCache); } - utility::decodeDracoMeshes(gltf, prim, dracoCache); - } - utility::decodeMeshoptCompression((cgltf_data*) gltf); + utility::decodeMeshoptCompression((cgltf_data*) gltf); - uploadBuffers(asset, *pImpl->mEngine, pImpl->mUriDataCache); + uploadBuffers(asset, *pImpl->mEngine, pImpl->mUriDataCache); - // Compute surface orientation quaternions if necessary. This is similar to sparse data in - // that we need to generate the contents of a GPU buffer by processing one or more CPU - // buffer(s). - pImpl->computeTangents(asset); + // Compute surface orientation quaternions if necessary. This is similar to sparse data in + // that we need to generate the contents of a GPU buffer by processing one or more CPU + // buffer(s). + pImpl->computeTangents(asset); - asset->mBufferSlots.clear(); - asset->mPrimitives.clear(); + std::get(asset->mResourceInfo).mBufferSlots.clear(); + std::get(asset->mResourceInfo).mPrimitives.clear(); + } else { + auto& slots = std::get(asset->mResourceInfo).slots; + ResourceLoaderExtended::loadResources(slots, pImpl->mEngine, asset->mBufferObjects); + } createSkins(gltf, pImpl->mNormalizeSkinningWeights, asset->mSkins); @@ -663,7 +676,9 @@ void ResourceLoader::Impl::computeTangents(FFilamentAsset* asset) { // Collect all TANGENT vertex attribute slots that need to be populated. tsl::robin_map baseTangents; - for (auto slot : asset->mBufferSlots) { + auto& slots = std::get(asset->mResourceInfo).mBufferSlots; + auto& primitives = std::get(asset->mResourceInfo).mPrimitives; + for (auto const& slot: slots) { if (slot.accessor != kGenerateTangents && slot.accessor != kGenerateNormals) { continue; } @@ -673,7 +688,7 @@ void ResourceLoader::Impl::computeTangents(FFilamentAsset* asset) { // Create a job description for each triangle-based primitive. using Params = TangentsJob::Params; std::vector jobParams; - for (auto [prim, vb] : asset->mPrimitives) { + for (auto const& [prim, vb] : primitives) { if (UTILS_UNLIKELY(prim->type != cgltf_primitive_type_triangles)) { continue; } diff --git a/libs/gltfio/src/extended/AssetLoaderExtended.cpp b/libs/gltfio/src/extended/AssetLoaderExtended.cpp index 08cf85fd9d1..7261abfbf02 100644 --- a/libs/gltfio/src/extended/AssetLoaderExtended.cpp +++ b/libs/gltfio/src/extended/AssetLoaderExtended.cpp @@ -41,9 +41,8 @@ constexpr uint8_t const VERTEX_JOB = 0x1; constexpr uint8_t const INDEX_JOB = 0x2; constexpr uint8_t const MORPH_TARGET_JOB = 0x4; -// TODO: will be updated on proper integration -constexpr int const GENERATED_0 = TmpBufferSlot::GENERATED_0_INDEX; -constexpr int const GENERATED_1 = TmpBufferSlot::GENERATED_1_INDEX; +constexpr int const GENERATED_0 = FFilamentAsset::ResourceInfoExtended::GENERATED_0_INDEX; +constexpr int const GENERATED_1 = FFilamentAsset::ResourceInfoExtended::GENERATED_1_INDEX; using BufferSlot = AssetLoaderExtended::BufferSlot; using BufferType = std::variant; diff --git a/libs/gltfio/src/extended/AssetLoaderExtended.h b/libs/gltfio/src/extended/AssetLoaderExtended.h index 0d9e3cce0e4..7af8054409c 100644 --- a/libs/gltfio/src/extended/AssetLoaderExtended.h +++ b/libs/gltfio/src/extended/AssetLoaderExtended.h @@ -47,33 +47,6 @@ struct FilamentAttribute { int slot; }; -// This will be removed when the extended classes are properly integrated. -struct TmpBufferSlot { - // Used to denote a generated buffer. Set as `index in `CgltfAttribute`. - static constexpr int const GENERATED_0_INDEX = -1; - static constexpr int const GENERATED_1_INDEX = -2; - - VertexBuffer* vertices = nullptr; - IndexBuffer* indices = nullptr; - MorphTargetBuffer* target = nullptr; - int slot = -1; - size_t sizeInBytes = 0; - - void* data = nullptr; - - // MorphTarget-only data; - struct { - short4* tbn = nullptr; - float3* positions = nullptr; - } targetData; -}; - -// This will be removed when the extended classes are properly integrated. -struct TmpAssetConfigurationExtended { - //! gltfio/ResourceLoader.h - char const* gltfPath; -}; - // AssetLoaderExtended performs the same task as AssetLoader. Specifically, it takes the data from // cgltf and store them in CPU memory. These buffers are then forwarded to ResourceLoader for // uplaoding to GPU. The difference between this class AssetLoader is that tangent space computation @@ -81,8 +54,7 @@ struct TmpAssetConfigurationExtended { // TangentSpaceMesh will remesh the input and possibly change the indices, vertex count, and // triangle counts, and so those changes must be resolved before the buffers are sent to the GPU. struct AssetLoaderExtended { - using BufferSlot = TmpBufferSlot; - using AssetConfigurationExtended = TmpAssetConfigurationExtended; + using BufferSlot = FFilamentAsset::ResourceInfoExtended::BufferSlot; using Output = Primitive; struct Input { diff --git a/libs/gltfio/src/extended/ResourceLoaderExtended.h b/libs/gltfio/src/extended/ResourceLoaderExtended.h index 13fba7354f8..c095a046f52 100644 --- a/libs/gltfio/src/extended/ResourceLoaderExtended.h +++ b/libs/gltfio/src/extended/ResourceLoaderExtended.h @@ -17,8 +17,6 @@ #ifndef GLTFIO_RESOURCELOADEREXTENDED_H #define GLTFIO_RESOURCELOADEREXTENDED_H - -#include "AssetLoaderExtended.h" #include "../FFilamentAsset.h" #include @@ -26,7 +24,7 @@ namespace filament::gltfio { struct ResourceLoaderExtended { - using BufferSlot = AssetLoaderExtended::BufferSlot; + using BufferSlot = FFilamentAsset::ResourceInfoExtended::BufferSlot; static void loadResources( std::vector const& slots, filament::Engine* engine, std::vector& bufferObjects); diff --git a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp index 365bd75260a..33ab6f04dbe 100644 --- a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp +++ b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp @@ -28,6 +28,9 @@ namespace { using AuxType = TangentSpaceMeshWrapper::AuxType; using Builder = TangentSpaceMeshWrapper::Builder; +template +using is_supported_aux_t = TangentSpaceMeshWrapper::is_supported_aux_t; + struct Passthrough { static constexpr int POSITION = 256; static constexpr int UV0 = 257; @@ -184,18 +187,11 @@ struct TangentSpaceMeshWrapper::Impl { return data; } - template - using is_supported_aux_t = - typename std::enable_if::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value>::type; template> T getAux(AuxType attribute) noexcept { size_t const nbytes = getVertexCount() * sizeof(std::remove_pointer_t); auto data = (T) malloc(nbytes); - DO_MESH_IMPL(getAux, data); + DO_MESH_IMPL(getAux, attribute, data); return data; } @@ -234,7 +230,7 @@ struct TangentSpaceMeshWrapper::Builder::Impl { void triangles(uint3 const* triangles) noexcept { DO_BUILDER_IMPL(triangles, triangles); } void triangleCount(size_t count) noexcept { DO_BUILDER_IMPL(triangleCount, count); } - template + template> void aux(AuxType type, T data) { DO_BUILDER_IMPL(aux, type, data); } @@ -295,7 +291,13 @@ Builder& Builder::triangles(uint3 const* triangles) noexcept { return *this; } -template +template Builder& Builder::aux(AuxType attribute, float2* data); +template Builder& Builder::aux(AuxType attribute, float3* data); +template Builder& Builder::aux(AuxType attribute, float4* data); +template Builder& Builder::aux(AuxType attribute, ushort3* data); +template Builder& Builder::aux(AuxType attribute, ushort4* data); + +template Builder& Builder::aux(AuxType type, T data) { mImpl->aux(type, data); return *this; @@ -318,6 +320,12 @@ short4* TangentSpaceMeshWrapper::getQuats() noexcept { return mImpl->getQuats(); uint3* TangentSpaceMeshWrapper::getTriangles() { return mImpl->getTriangles(); } size_t TangentSpaceMeshWrapper::getVertexCount() const noexcept { return mImpl->getVertexCount(); } +template float2* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template float3* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template float4* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template ushort3* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template ushort4* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; + template T TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept { return mImpl->getAux(attribute); diff --git a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h index 6aab1574726..2dc2d5e8ddb 100644 --- a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h +++ b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h @@ -30,6 +30,12 @@ using namespace math; struct TangentSpaceMeshWrapper { using AuxType = geometry::TangentSpaceMesh::AuxAttribute; + template + using is_supported_aux_t = typename std::enable_if< + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value>::type; + struct Builder { struct Impl; @@ -42,8 +48,10 @@ struct TangentSpaceMeshWrapper { Builder& positions(float3 const* positions) noexcept; Builder& triangleCount(size_t triangleCount) noexcept; Builder& triangles(uint3 const* triangles) noexcept; - template + + template> Builder& aux(AuxType type, T data); + TangentSpaceMeshWrapper* build(); private: @@ -51,7 +59,7 @@ struct TangentSpaceMeshWrapper { }; explicit TangentSpaceMeshWrapper() = default; - + static void destroy(TangentSpaceMeshWrapper* mesh); float3* getPositions() noexcept; From e7feee7d5b64badc711112f3a37960b7d7870436 Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Mon, 29 Apr 2024 11:18:28 -0600 Subject: [PATCH 22/31] Switch setFrameScheduledCallback to use utils::Invocable (#7792) --- NEW_RELEASE_NOTES.md | 3 ++ .../backend/include/backend/DriverEnums.h | 3 +- .../backend/include/backend/PresentCallable.h | 4 +- .../include/private/backend/DriverAPI.inc | 4 +- filament/backend/src/metal/MetalDriver.mm | 6 +-- filament/backend/src/metal/MetalHandles.h | 25 +++++++----- filament/backend/src/metal/MetalHandles.mm | 40 ++++++++++++++----- filament/backend/src/noop/NoopDriver.cpp | 2 +- filament/backend/src/opengl/OpenGLDriver.cpp | 2 +- filament/backend/src/vulkan/VulkanDriver.cpp | 2 +- filament/include/filament/SwapChain.h | 40 +++++++++++-------- filament/src/SwapChain.cpp | 9 +++-- filament/src/details/SwapChain.cpp | 11 ++--- filament/src/details/SwapChain.h | 7 ++-- 14 files changed, 96 insertions(+), 62 deletions(-) diff --git a/NEW_RELEASE_NOTES.md b/NEW_RELEASE_NOTES.md index 494e8046aa6..b78d8a399e9 100644 --- a/NEW_RELEASE_NOTES.md +++ b/NEW_RELEASE_NOTES.md @@ -8,4 +8,7 @@ appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md). ## Release notes for next branch cut +- Add new matedit tool +- filagui: Support rendering `GL_TEXTURE_EXTERNAL_OES` textures. +- `setFrameScheduledCallback` now takes a `utils::Invocable`. - engine: Add `isPaused()` diff --git a/filament/backend/include/backend/DriverEnums.h b/filament/backend/include/backend/DriverEnums.h index 411aa65a99d..969632c327a 100644 --- a/filament/backend/include/backend/DriverEnums.h +++ b/filament/backend/include/backend/DriverEnums.h @@ -24,6 +24,7 @@ #include +#include #include #include @@ -1224,7 +1225,7 @@ static_assert(sizeof(StencilState::StencilOperations) == 5u, static_assert(sizeof(StencilState) == 12u, "StencilState size not what was intended"); -using FrameScheduledCallback = void(*)(PresentCallable callable, void* user); +using FrameScheduledCallback = utils::Invocable; enum class Workaround : uint16_t { // The EASU pass must split because shader compiler flattens early-exit branch diff --git a/filament/backend/include/backend/PresentCallable.h b/filament/backend/include/backend/PresentCallable.h index 4402f22266d..f37d7704b49 100644 --- a/filament/backend/include/backend/PresentCallable.h +++ b/filament/backend/include/backend/PresentCallable.h @@ -48,7 +48,7 @@ namespace filament::backend { * and optional user data: * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * swapChain->setFrameScheduledCallback(myFrameScheduledCallback, nullptr); + * swapChain->setFrameScheduledCallback(nullptr, myFrameScheduledCallback); * if (renderer->beginFrame(swapChain)) { * renderer->render(view); * renderer->endFrame(); @@ -58,8 +58,6 @@ namespace filament::backend { * @remark Only Filament's Metal backend supports PresentCallables and frame callbacks. Other * backends ignore the callback (which will never be called) and proceed normally. * - * @remark The SwapChain::FrameScheduledCallback is called on an arbitrary thread. - * * Applications *must* call each PresentCallable they receive. Each PresentCallable represents a * frame that is waiting to be presented. If an application fails to call a PresentCallable, a * memory leak could occur. To "cancel" the presentation of a frame, pass false to the diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index 2e2f81d9d37..f729a370258 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -138,8 +138,8 @@ DECL_DRIVER_API_N(beginFrame, DECL_DRIVER_API_N(setFrameScheduledCallback, backend::SwapChainHandle, sch, - backend::FrameScheduledCallback, callback, - void*, user) + backend::CallbackHandler*, handler, + backend::FrameScheduledCallback&&, callback) DECL_DRIVER_API_N(setFrameCompletedCallback, backend::SwapChainHandle, sch, diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 65d1c918033..83c858bad20 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -222,10 +222,10 @@ } } -void MetalDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { +void MetalDriver::setFrameScheduledCallback( + Handle sch, CallbackHandler* handler, FrameScheduledCallback&& callback) { auto* swapChain = handle_cast(sch); - swapChain->setFrameScheduledCallback(callback, user); + swapChain->setFrameScheduledCallback(handler, std::move(callback)); } void MetalDriver::setFrameCompletedCallback(Handle sch, diff --git a/filament/backend/src/metal/MetalHandles.h b/filament/backend/src/metal/MetalHandles.h index fea6f0947f9..c6c65e1f7d6 100644 --- a/filament/backend/src/metal/MetalHandles.h +++ b/filament/backend/src/metal/MetalHandles.h @@ -31,6 +31,8 @@ #include "private/backend/SamplerGroup.h" +#include + #include #include #include @@ -71,9 +73,9 @@ class MetalSwapChain : public HwSwapChain { void releaseDrawable(); - void setFrameScheduledCallback(FrameScheduledCallback callback, void* user); - void setFrameCompletedCallback(CallbackHandler* handler, - CallbackHandler::Callback callback, void* user); + void setFrameScheduledCallback(CallbackHandler* handler, FrameScheduledCallback&& callback); + void setFrameCompletedCallback( + CallbackHandler* handler, CallbackHandler::Callback callback, void* user); // For CAMetalLayer-backed SwapChains, presents the drawable or schedules a // FrameScheduledCallback. @@ -110,14 +112,15 @@ class MetalSwapChain : public HwSwapChain { MetalExternalImage externalImage; SwapChainType type; - // These two fields store a callback and user data to notify the client that a frame is ready - // for presentation. - // If frameScheduledCallback is nullptr, then the Metal backend automatically calls - // presentDrawable when the frame is committed. - // Otherwise, the Metal backend will not automatically present the frame. Instead, clients bear - // the responsibility of presenting the frame by calling the PresentCallable object. - FrameScheduledCallback frameScheduledCallback = nullptr; - void* frameScheduledUserData = nullptr; + // These fields store a callback to notify the client that a frame is ready for presentation. If + // !frameScheduled.callback, then the Metal backend automatically calls presentDrawable when the + // frame is committed. Otherwise, the Metal backend will not automatically present the frame. + // Instead, clients bear the responsibility of presenting the frame by calling the + // PresentCallable object. + struct { + CallbackHandler* handler = nullptr; + FrameScheduledCallback callback = {}; + } frameScheduled; struct { CallbackHandler* handler = nullptr; diff --git a/filament/backend/src/metal/MetalHandles.mm b/filament/backend/src/metal/MetalHandles.mm index 0d9976211da..e8ab879729a 100644 --- a/filament/backend/src/metal/MetalHandles.mm +++ b/filament/backend/src/metal/MetalHandles.mm @@ -221,9 +221,10 @@ static inline MTLTextureUsage getMetalTextureUsage(TextureUsage usage) { depthStencilTexture = [context.device newTextureWithDescriptor:descriptor]; } -void MetalSwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void* user) { - frameScheduledCallback = callback; - frameScheduledUserData = user; +void MetalSwapChain::setFrameScheduledCallback( + CallbackHandler* handler, FrameScheduledCallback&& callback) { + frameScheduled.handler = handler; + frameScheduled.callback = std::move(callback); } void MetalSwapChain::setFrameCompletedCallback(CallbackHandler* handler, @@ -238,7 +239,7 @@ static inline MTLTextureUsage getMetalTextureUsage(TextureUsage usage) { scheduleFrameCompletedCallback(); } if (drawable) { - if (frameScheduledCallback) { + if (frameScheduled.callback) { scheduleFrameScheduledCallback(); } else { [getPendingCommandBuffer(&context) presentDrawable:drawable]; @@ -296,21 +297,38 @@ void presentDrawable(bool presentFrame, void* user) { } void MetalSwapChain::scheduleFrameScheduledCallback() { - if (!frameScheduledCallback) { + if (!frameScheduled.callback) { return; } assert_invariant(drawable); - // Destroy this by calling maybePresentAndDestroyAsync() later. - auto* presentData = PresentDrawableData::create(drawable, context.driver); + struct Callback { + Callback(FrameScheduledCallback&& callback, id drawable, + MetalDriver* driver) + : f(std::move(callback)), data(PresentDrawableData::create(drawable, driver)) {} + FrameScheduledCallback f; + // PresentDrawableData* is destroyed by maybePresentAndDestroyAsync() later. + std::unique_ptr data; + static void func(void* user) { + auto* const c = reinterpret_cast(user); + PresentDrawableData* presentDrawableData = c->data.release(); + PresentCallable presentCallable(presentDrawable, presentDrawableData); + c->f(presentCallable); + delete c; + } + }; - FrameScheduledCallback userCallback = frameScheduledCallback; - void* userData = frameScheduledUserData; + // This callback pointer will be captured by the block. Even if the scheduled handler is never + // called, the unique_ptr will still ensure we don't leak memory. + __block auto callback = + std::make_unique(std::move(frameScheduled.callback), drawable, context.driver); + backend::CallbackHandler* handler = frameScheduled.handler; + MetalDriver* driver = context.driver; [getPendingCommandBuffer(&context) addScheduledHandler:^(id cb) { - PresentCallable callable(presentDrawable, static_cast(presentData)); - userCallback(callable, userData); + Callback* user = callback.release(); + driver->scheduleCallback(handler, user, &Callback::func); }]; } diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp index 911f967413d..9984bed9a68 100644 --- a/filament/backend/src/noop/NoopDriver.cpp +++ b/filament/backend/src/noop/NoopDriver.cpp @@ -54,7 +54,7 @@ void NoopDriver::beginFrame(int64_t monotonic_clock_ns, } void NoopDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { + CallbackHandler* handler, FrameScheduledCallback&& callback) { } diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index bad0bba635a..ddf699b75f1 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -3411,7 +3411,7 @@ void OpenGLDriver::beginFrame( } void OpenGLDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { + CallbackHandler* handler, FrameScheduledCallback&& callback) { DEBUG_MARKER() } diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index d039d3ff267..07df284726f 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -394,7 +394,7 @@ void VulkanDriver::beginFrame(int64_t monotonic_clock_ns, } void VulkanDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { + CallbackHandler* handler, FrameScheduledCallback&& callback) { } void VulkanDriver::setFrameCompletedCallback(Handle sch, diff --git a/filament/include/filament/SwapChain.h b/filament/include/filament/SwapChain.h index 0af01afc966..585e016eec0 100644 --- a/filament/include/filament/SwapChain.h +++ b/filament/include/filament/SwapChain.h @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -264,13 +265,22 @@ class UTILS_PUBLIC SwapChain : public FilamentAPI { * backend. * * A FrameScheduledCallback can be set on an individual SwapChain through - * SwapChain::setFrameScheduledCallback. If the callback is set, then the SwapChain will *not* - * automatically schedule itself for presentation. Instead, the application must call the - * PresentCallable passed to the FrameScheduledCallback. + * SwapChain::setFrameScheduledCallback. If the callback is set for a given frame, then the + * SwapChain will *not* automatically schedule itself for presentation. Instead, the application + * must call the PresentCallable passed to the FrameScheduledCallback. * - * There may be only one FrameScheduledCallback set per SwapChain. A call to - * SwapChain::setFrameScheduledCallback will overwrite any previous FrameScheduledCallbacks set - * on the same SwapChain. + * Each SwapChain can have only one FrameScheduledCallback set per frame. If + * setFrameScheduledCallback is called multiple times on the same SwapChain before + * Renderer::endFrame(), the most recent call effectively overwrites any previously set + * callback. This allows the callback to be updated as needed before the frame has finished + * encoding. + * + * The "last" callback set by setFrameScheduledCallback gets "latched" when Renderer::endFrame() + * is executed. At this point, the state of the callback is fixed and is the one used for the + * frame that was just encoded. Subsequent changes to the callback using + * setFrameScheduledCallback after endFrame() apply to the next frame. + * + * Use \c setFrameScheduledCallback() (with default arguments) to unset the callback. * * If your application delays the call to the PresentCallable by, for example, calling it on a * separate thread, you must ensure all PresentCallables have been called before shutting down @@ -278,28 +288,26 @@ class UTILS_PUBLIC SwapChain : public FilamentAPI { * Engine::shutdown. This is necessary to ensure the Filament Engine has had a chance to clean * up all memory related to frame presentation. * - * @param callback A callback, or nullptr to unset. - * @param user An optional pointer to user data passed to the callback function. + * @param handler Handler to dispatch the callback or nullptr for the default handler. + * @param callback Callback called when the frame is scheduled. * * @remark Only Filament's Metal backend supports PresentCallables and frame callbacks. Other * backends ignore the callback (which will never be called) and proceed normally. * - * @remark The SwapChain::FrameScheduledCallback is called on an arbitrary thread. - * + * @see CallbackHandler * @see PresentCallable */ - void setFrameScheduledCallback(FrameScheduledCallback UTILS_NULLABLE callback, - void* UTILS_NULLABLE user = nullptr); + void setFrameScheduledCallback(backend::CallbackHandler* UTILS_NULLABLE handler = nullptr, + FrameScheduledCallback&& callback = {}); /** - * Returns the SwapChain::FrameScheduledCallback that was previously set with - * SwapChain::setFrameScheduledCallback, or nullptr if one is not set. + * Returns whether or not this SwapChain currently has a FrameScheduledCallback set. * - * @return the previously-set FrameScheduledCallback, or nullptr + * @return true, if the last call to setFrameScheduledCallback set a callback * * @see SwapChain::setFrameCompletedCallback */ - UTILS_NULLABLE FrameScheduledCallback getFrameScheduledCallback() const noexcept; + bool isFrameScheduledCallbackSet() const noexcept; /** * FrameCompletedCallback is a callback function that notifies an application when a frame's diff --git a/filament/src/SwapChain.cpp b/filament/src/SwapChain.cpp index a242ef06ccb..dd7db7bd011 100644 --- a/filament/src/SwapChain.cpp +++ b/filament/src/SwapChain.cpp @@ -28,12 +28,13 @@ void* SwapChain::getNativeWindow() const noexcept { return downcast(this)->getNativeWindow(); } -void SwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void* user) { - downcast(this)->setFrameScheduledCallback(callback, user); +void SwapChain::setFrameScheduledCallback( + backend::CallbackHandler* handler, FrameScheduledCallback&& callback) { + downcast(this)->setFrameScheduledCallback(handler, std::move(callback)); } -SwapChain::FrameScheduledCallback SwapChain::getFrameScheduledCallback() const noexcept { - return downcast(this)->getFrameScheduledCallback(); +bool SwapChain::isFrameScheduledCallbackSet() const noexcept { + return downcast(this)->isFrameScheduledCallbackSet(); } void SwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler, diff --git a/filament/src/details/SwapChain.cpp b/filament/src/details/SwapChain.cpp index ef4eb4fabd4..0407d893763 100644 --- a/filament/src/details/SwapChain.cpp +++ b/filament/src/details/SwapChain.cpp @@ -69,13 +69,14 @@ void FSwapChain::terminate(FEngine& engine) noexcept { engine.getDriverApi().destroySwapChain(mHwSwapChain); } -void FSwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void* user) { - mFrameScheduledCallback = callback; - mEngine.getDriverApi().setFrameScheduledCallback(mHwSwapChain, callback, user); +void FSwapChain::setFrameScheduledCallback( + backend::CallbackHandler* handler, FrameScheduledCallback&& callback) { + mFrameScheduledCallbackIsSet = bool(callback); + mEngine.getDriverApi().setFrameScheduledCallback(mHwSwapChain, handler, std::move(callback)); } -SwapChain::FrameScheduledCallback FSwapChain::getFrameScheduledCallback() const noexcept { - return mFrameScheduledCallback; +bool FSwapChain::isFrameScheduledCallbackSet() const noexcept { + return mFrameScheduledCallbackIsSet; } void FSwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler, diff --git a/filament/src/details/SwapChain.h b/filament/src/details/SwapChain.h index 7a97727e832..efe7483563e 100644 --- a/filament/src/details/SwapChain.h +++ b/filament/src/details/SwapChain.h @@ -78,9 +78,10 @@ class FSwapChain : public SwapChain { return mHwSwapChain; } - void setFrameScheduledCallback(FrameScheduledCallback callback, void* user); + void setFrameScheduledCallback( + backend::CallbackHandler* handler, FrameScheduledCallback&& callback); - FrameScheduledCallback getFrameScheduledCallback() const noexcept; + bool isFrameScheduledCallbackSet() const noexcept; void setFrameCompletedCallback(backend::CallbackHandler* handler, utils::Invocable&& callback) noexcept; @@ -96,7 +97,7 @@ class FSwapChain : public SwapChain { private: FEngine& mEngine; backend::Handle mHwSwapChain; - FrameScheduledCallback mFrameScheduledCallback{}; + bool mFrameScheduledCallbackIsSet = false; void* mNativeWindow{}; uint32_t mWidth{}; uint32_t mHeight{}; From d697d42c4a608ff97bc7c7e769417fbfc29a9eed Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Mon, 29 Apr 2024 11:32:04 -0700 Subject: [PATCH 23/31] vk: finer-grain barrier at renderpass end (#7808) - At the end of a renderpass we use a more fine-grained barrier for each of the attachments in the render target - Make sure that buffer update are barrier'd from previous reads - Remove previous Mali workaround barriers. Seems to be fine without them on pixels + Mali. --- filament/backend/src/vulkan/VulkanBuffer.cpp | 60 ++++++--- filament/backend/src/vulkan/VulkanBuffer.h | 3 +- filament/backend/src/vulkan/VulkanDriver.cpp | 115 +++++++++++------- filament/backend/src/vulkan/VulkanDriver.h | 21 ++++ .../backend/src/vulkan/VulkanFboCache.cpp | 10 +- filament/backend/src/vulkan/VulkanFboCache.h | 17 +-- filament/backend/src/vulkan/VulkanHandles.cpp | 12 +- filament/backend/src/vulkan/VulkanHandles.h | 9 +- 8 files changed, 165 insertions(+), 82 deletions(-) diff --git a/filament/backend/src/vulkan/VulkanBuffer.cpp b/filament/backend/src/vulkan/VulkanBuffer.cpp index 1b5f59eafd6..bc09c7f5213 100644 --- a/filament/backend/src/vulkan/VulkanBuffer.cpp +++ b/filament/backend/src/vulkan/VulkanBuffer.cpp @@ -27,8 +27,8 @@ VulkanBuffer::VulkanBuffer(VmaAllocator allocator, VulkanStagePool& stagePool, VkBufferUsageFlags usage, uint32_t numBytes) : mAllocator(allocator), mStagePool(stagePool), - mUsage(usage) { - + mUsage(usage), + mUpdatedBytes(0) { // for now make sure that only 1 bit is set in usage // (because loadFromCpu() assumes that somewhat) assert_invariant(usage && !(usage & (usage - 1))); @@ -49,7 +49,7 @@ VulkanBuffer::~VulkanBuffer() { } void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint32_t byteOffset, - uint32_t numBytes) const { + uint32_t numBytes) { assert_invariant(byteOffset == 0); VulkanStage const* stage = mStagePool.acquireStage(numBytes); void* mapped; @@ -58,15 +58,47 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint vmaUnmapMemory(mAllocator, stage->memory); vmaFlushAllocation(mAllocator, stage->memory, byteOffset, numBytes); + // If there was a previous update, then we need to make sure the following write is properly + // synced with the previous read. + if (mUpdatedBytes > 0) { + VkAccessFlags srcAccess = 0; + VkPipelineStageFlags srcStage = 0; + if (mUsage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { + srcAccess = VK_ACCESS_SHADER_READ_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } else if (mUsage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) { + srcAccess = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + } else if (mUsage & VK_BUFFER_USAGE_INDEX_BUFFER_BIT) { + srcAccess = VK_ACCESS_INDEX_READ_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + } + + VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = srcAccess, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = mGpuBuffer, + .size = mUpdatedBytes, + }; + vkCmdPipelineBarrier(cmdbuf, srcStage, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + } + VkBufferCopy region{ .size = numBytes }; vkCmdCopyBuffer(cmdbuf, stage->buffer, mGpuBuffer, 1, ®ion); + mUpdatedBytes = numBytes; + // Firstly, ensure that the copy finishes before the next draw call. // Secondly, in case the user decides to upload another chunk (without ever using the first one) // we need to ensure that this upload completes first (hence // dstStageMask=VK_PIPELINE_STAGE_TRANSFER_BIT). VkAccessFlags dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + if (mUsage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) { dstAccessMask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; dstStageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; @@ -75,26 +107,24 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint dstStageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; } else if (mUsage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { dstAccessMask |= VK_ACCESS_UNIFORM_READ_BIT; - // NOTE: ideally dstStageMask would include VERTEX_SHADER_BIT | FRAGMENT_SHADER_BIT, but - // this seems to be insufficient on Mali devices. To work around this we are using a more - // aggressive ALL_GRAPHICS_BIT barrier. - dstStageMask |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; + dstStageMask |= + (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); } else if (mUsage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { // TODO: implement me } VkBufferMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = dstAccessMask, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = mGpuBuffer, - .size = VK_WHOLE_SIZE, + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = dstAccessMask, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = mGpuBuffer, + .size = VK_WHOLE_SIZE, }; vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, dstStageMask, 0, 0, nullptr, 1, - &barrier, 0, nullptr); + &barrier, 0, nullptr); } } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanBuffer.h b/filament/backend/src/vulkan/VulkanBuffer.h index db24edf3943..e496f930dbe 100644 --- a/filament/backend/src/vulkan/VulkanBuffer.h +++ b/filament/backend/src/vulkan/VulkanBuffer.h @@ -30,7 +30,7 @@ class VulkanBuffer { uint32_t numBytes); ~VulkanBuffer(); void loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint32_t byteOffset, - uint32_t numBytes) const; + uint32_t numBytes); VkBuffer getGpuBuffer() const { return mGpuBuffer; } @@ -42,6 +42,7 @@ class VulkanBuffer { VmaAllocation mGpuMemory = VK_NULL_HANDLE; VkBuffer mGpuBuffer = VK_NULL_HANDLE; VkBufferUsageFlags mUsage = {}; + uint32_t mUpdatedBytes = 0; }; } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 07df284726f..49e6f581c9f 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -1236,12 +1236,7 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP } } - VulkanLayout const currentDepthLayout = depth.getLayout(); - VulkanLayout const renderPassDepthLayout = VulkanLayout::DEPTH_ATTACHMENT; - // We need to keep the final layout as an attachment because the implicit transition does not - // have any barrier guarrantees, meaning that if we want to sample from the output in the next - // pass, then we'd have a race-condition/validation error. - VulkanLayout const finalDepthLayout = renderPassDepthLayout; + VulkanLayout currentDepthLayout = depth.getLayout(); TargetBufferFlags clearVal = params.flags.clear; TargetBufferFlags discardEndVal = params.flags.discardEnd; @@ -1250,16 +1245,20 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP discardEndVal &= ~TargetBufferFlags::DEPTH; clearVal &= ~TargetBufferFlags::DEPTH; } - auto const attachmentSubresourceRange = depth.getSubresourceRange(); - depth.texture->setLayout(attachmentSubresourceRange, VulkanLayout::DEPTH_ATTACHMENT); + // If the depth attachment texture was previously sampled, then we need to manually + // transition it to an attachment. This is necessary to also set up a barrier between the + // previous read and the potentially coming write. + if (currentDepthLayout == VulkanLayout::DEPTH_SAMPLER) { + depth.texture->transitionLayout(cmdbuffer, depth.getSubresourceRange(), + VulkanLayout::DEPTH_ATTACHMENT); + currentDepthLayout = VulkanLayout::DEPTH_ATTACHMENT; + } } // Create the VkRenderPass or fetch it from cache. VulkanFboCache::RenderPassKey rpkey = { .initialColorLayoutMask = 0, .initialDepthLayout = currentDepthLayout, - .renderPassDepthLayout = renderPassDepthLayout, - .finalDepthLayout = finalDepthLayout, .depthFormat = depth.getFormat(), .clear = clearVal, .discardStart = discardStart, @@ -1296,19 +1295,30 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP .layers = 1, .samples = rpkey.samples, }; + auto& renderPassAttachments = mRenderPassFboInfo.attachments; for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) { if (!rt->getColor(i).texture) { fbkey.color[i] = VK_NULL_HANDLE; fbkey.resolve[i] = VK_NULL_HANDLE; } else if (fbkey.samples == 1) { - fbkey.color[i] = rt->getColor(i).getImageView(); + auto& colorAttachment = rt->getColor(i); + renderPassAttachments.insert(colorAttachment); + fbkey.color[i] = colorAttachment.getImageView(); fbkey.resolve[i] = VK_NULL_HANDLE; assert_invariant(fbkey.color[i]); } else { - fbkey.color[i] = rt->getMsaaColor(i).getImageView(); - VulkanTexture* texture = (VulkanTexture*) rt->getColor(i).texture; + auto& msaaColorAttachment = rt->getMsaaColor(i); + renderPassAttachments.insert(msaaColorAttachment); + + auto& colorAttachment = rt->getColor(i); + fbkey.color[i] = msaaColorAttachment.getImageView(); + + VulkanTexture* texture = colorAttachment.texture; if (texture->samples == 1) { - fbkey.resolve[i] = rt->getColor(i).getImageView(); + mRenderPassFboInfo.hasColorResolve = true; + + renderPassAttachments.insert(colorAttachment); + fbkey.resolve[i] = colorAttachment.getImageView(); assert_invariant(fbkey.resolve[i]); } assert_invariant(fbkey.color[i]); @@ -1317,6 +1327,10 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP if (depth.texture) { fbkey.depth = depth.getImageView(); assert_invariant(fbkey.depth); + renderPassAttachments.insert(depth); + + UTILS_UNUSED_IN_RELEASE bool const depthDiscardEnd = + any(rpkey.discardEnd & TargetBufferFlags::DEPTH); // Vulkan 1.1 does not support multisampled depth resolve, so let's check here // and assert if this is requested. (c.f. isAutoDepthResolveSupported) @@ -1325,7 +1339,7 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP // - If the RT is MS then all SS attachments are auto resolved if not discarded. assert_invariant(!(rt->getSamples() > 1 && rt->getDepth().texture->samples == 1 && - !any(rpkey.discardEnd & TargetBufferFlags::DEPTH))); + !depthDiscardEnd)); } VkFramebuffer vkfb = mFramebufferCache.getFramebuffer(fbkey); @@ -1338,16 +1352,10 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP } #endif - // The current command buffer now owns a reference to the render target and its attachments. - // Note that we must acquire parent textures, not sidecars. + // The current command buffer now has references to the render target and its attachments. commands.acquire(rt); - if (depth.texture) { - commands.acquire((VulkanTexture*) depth.texture); - } - for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) { - if (rt->getColor(i).texture) { - commands.acquire(rt->getColor(i).texture); - } + for (auto const& attachment: renderPassAttachments) { + commands.acquire(attachment.texture); } // Populate the structures required for vkCmdBeginRenderPass. @@ -1433,27 +1441,51 @@ void VulkanDriver::endRenderPass(int) { // issue several of them when considering MRT. This would be very complex to set up and would // require more state tracking, so we've chosen to use a memory barrier for simplicity and // correctness. - - // NOTE: ideally dstStageMask would merely be VERTEX_SHADER_BIT | FRAGMENT_SHADER_BIT, but this - // seems to be insufficient on Mali devices. To work around this we are adding a more aggressive - // TOP_OF_PIPE barrier. if (!rt->isSwapChain()) { - VkMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - }; - VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - if (rt->hasDepth()) { - barrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - srcStageMask |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + for (auto const& attachment: mRenderPassFboInfo.attachments) { + bool const isDepth = attachment.isDepth(); + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // This is a workaround around a validation issue (might not be an actual driver issue). + if (mRenderPassFboInfo.hasColorResolve && !isDepth) { + srcStageMask = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; + } + + VkPipelineStageFlags dstStageMask = + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + VkAccessFlags srcAccess = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkAccessFlags dstAccess = VK_ACCESS_SHADER_READ_BIT; + VulkanLayout layout = VulkanFboCache::FINAL_COLOR_ATTACHMENT_LAYOUT; + if (isDepth) { + srcAccess = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dstAccess = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + layout = VulkanFboCache::FINAL_DEPTH_ATTACHMENT_LAYOUT; + } + + auto const vkLayout = imgutil::getVkLayout(layout); + auto const& range = attachment.getSubresourceRange(); + VkImageMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = srcAccess, + .dstAccessMask = dstAccess, + .oldLayout = vkLayout, + .newLayout = vkLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = attachment.getImage(), + .subresourceRange = range, + }; + + attachment.texture->setLayout(range, layout); + vkCmdPipelineBarrier(cmdbuffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, + 1, &barrier); } - vkCmdPipelineBarrier(cmdbuffer, srcStageMask, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | // <== For Mali - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, 1, &barrier, 0, nullptr, 0, nullptr); } + mRenderPassFboInfo.clear(); mDescriptorSetManager.clearState(); mCurrentRenderPass.renderTarget = nullptr; mCurrentRenderPass.renderPass = VK_NULL_HANDLE; @@ -1818,7 +1850,6 @@ void VulkanDriver::bindPipeline(PipelineState pipelineState) { } VkSampler const vksampler = mSamplerCache.getSampler(boundSampler->s); - #if FVK_ENABLED_DEBUG_SAMPLER_NAME VulkanDriver::DebugUtils::setName(VK_OBJECT_TYPE_SAMPLER, reinterpret_cast(vksampler), bindingToName[binding].c_str()); diff --git a/filament/backend/src/vulkan/VulkanDriver.h b/filament/backend/src/vulkan/VulkanDriver.h index fca5c45c5be..8de0ae4a26e 100644 --- a/filament/backend/src/vulkan/VulkanDriver.h +++ b/filament/backend/src/vulkan/VulkanDriver.h @@ -42,6 +42,25 @@ namespace filament::backend { class VulkanPlatform; struct VulkanSamplerGroup; +// The maximum number of attachments for any renderpass (color + resolve + depth) +constexpr uint8_t MAX_RENDERTARGET_ATTACHMENT_TEXTURES = + MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT * 2 + 1; + +// We need to store information about a render pass to enable better barriers at the end of a +// renderpass. +struct RenderPassFboBundle { + using AttachmentArray = + CappedArray; + + AttachmentArray attachments; + bool hasColorResolve = false; + + void clear() { + attachments.clear(); + hasColorResolve = false; + } +}; + class VulkanDriver final : public DriverBase { public: static Driver* create(VulkanPlatform* platform, VulkanContext const& context, @@ -141,6 +160,8 @@ class VulkanDriver final : public DriverBase { VulkanDescriptorSetManager::GetPipelineLayoutFunction mGetPipelineFunction; + RenderPassFboBundle mRenderPassFboInfo; + bool const mIsSRGBSwapChainSupported; }; diff --git a/filament/backend/src/vulkan/VulkanFboCache.cpp b/filament/backend/src/vulkan/VulkanFboCache.cpp index d8dc804fa05..c754fe57e70 100644 --- a/filament/backend/src/vulkan/VulkanFboCache.cpp +++ b/filament/backend/src/vulkan/VulkanFboCache.cpp @@ -33,8 +33,6 @@ bool VulkanFboCache::RenderPassEq::operator()(const RenderPassKey& k1, const RenderPassKey& k2) const { if (k1.initialColorLayoutMask != k2.initialColorLayoutMask) return false; if (k1.initialDepthLayout != k2.initialDepthLayout) return false; - if (k1.renderPassDepthLayout != k2.renderPassDepthLayout) return false; - if (k1.finalDepthLayout != k2.finalDepthLayout) return false; for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) { if (k1.colorFormat[i] != k2.colorFormat[i]) return false; } @@ -243,7 +241,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .initialLayout = ((!discard && config.initialColorLayoutMask & (1 << i)) || clear) ? imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT) : imgutil::getVkLayout(VulkanLayout::UNDEFINED), - .finalLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + .finalLayout = imgutil::getVkLayout(FINAL_COLOR_ATTACHMENT_LAYOUT), }; } @@ -281,7 +279,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .stencilLoadOp = kDontCare, .stencilStoreOp = kDisableStore, .initialLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), - .finalLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + .finalLayout = imgutil::getVkLayout(FINAL_COLOR_ATTACHMENT_LAYOUT), }; } @@ -290,7 +288,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { const bool clear = any(config.clear & TargetBufferFlags::DEPTH); const bool discardStart = any(config.discardStart & TargetBufferFlags::DEPTH); const bool discardEnd = any(config.discardEnd & TargetBufferFlags::DEPTH); - depthAttachmentRef.layout = imgutil::getVkLayout(config.renderPassDepthLayout); + depthAttachmentRef.layout = imgutil::getVkLayout(VulkanLayout::DEPTH_ATTACHMENT); depthAttachmentRef.attachment = attachmentIndex; attachments[attachmentIndex++] = { .format = config.depthFormat, @@ -300,7 +298,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, .initialLayout = imgutil::getVkLayout(config.initialDepthLayout), - .finalLayout = imgutil::getVkLayout(config.finalDepthLayout), + .finalLayout = imgutil::getVkLayout(FINAL_DEPTH_ATTACHMENT_LAYOUT), }; } renderPassInfo.attachmentCount = attachmentIndex; diff --git a/filament/backend/src/vulkan/VulkanFboCache.h b/filament/backend/src/vulkan/VulkanFboCache.h index cefdae2995d..2f4b1a9101c 100644 --- a/filament/backend/src/vulkan/VulkanFboCache.h +++ b/filament/backend/src/vulkan/VulkanFboCache.h @@ -35,24 +35,25 @@ namespace filament::backend { // class VulkanFboCache { public: + constexpr static VulkanLayout FINAL_COLOR_ATTACHMENT_LAYOUT = VulkanLayout::COLOR_ATTACHMENT; + constexpr static VulkanLayout FINAL_RESOLVE_ATTACHMENT_LAYOUT = VulkanLayout::COLOR_ATTACHMENT; + constexpr static VulkanLayout FINAL_DEPTH_ATTACHMENT_LAYOUT = VulkanLayout::DEPTH_ATTACHMENT; + // RenderPassKey is a small POD representing the immutable state that is used to construct // a VkRenderPass. It is hashed and used as a lookup key. - // TODO: This struct can be reduced in size by using a subset of formats instead of VkFormat - // and removing the "finalDepthLayout" field. struct alignas(8) RenderPassKey { // For each target, we need to know three image layouts: the layout BEFORE the pass, the // layout DURING the pass, and the layout AFTER the pass. Here are the rules: // - For depth, we explicitly specify all three layouts. // - Color targets have their initial image layout specified with a bitmask. // - For each color target, the pre-existing layout is either UNDEFINED (0) or GENERAL (1). - // - The render pass and final images layout for color buffers is always GENERAL. + // - The render pass and final images layout for color buffers is always + // VulkanLayout::COLOR_ATTACHMENT. uint8_t initialColorLayoutMask; // Note that if VulkanLayout grows beyond 16, we'd need to up this. - VulkanLayout initialDepthLayout : 4; - VulkanLayout renderPassDepthLayout : 4; - VulkanLayout finalDepthLayout : 4; - uint8_t padding0 : 4; + VulkanLayout initialDepthLayout : 8; + uint8_t padding0; uint8_t padding1; VkFormat colorFormat[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT]; // 32 bytes @@ -63,7 +64,7 @@ class VulkanFboCache { uint8_t samples; // 1 byte uint8_t needsResolveMask; // 1 byte uint8_t subpassMask; // 1 byte - bool padding2; // 1 byte + uint8_t padding2; // 1 byte }; struct RenderPassVal { VkRenderPass handle; diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp index 5c951ed5b2e..0dd11ce8d1d 100644 --- a/filament/backend/src/vulkan/VulkanHandles.cpp +++ b/filament/backend/src/vulkan/VulkanHandles.cpp @@ -285,13 +285,13 @@ VulkanRenderTarget::VulkanRenderTarget(VkDevice device, VkPhysicalDevice physica // Constrain the sample count according to both kinds of sample count masks obtained from // VkPhysicalDeviceProperties. This is consistent with the VulkanTexture constructor. - const auto& limits = context.getPhysicalDeviceLimits(); + auto const& limits = context.getPhysicalDeviceLimits(); mSamples = samples = reduceSampleCount(samples, limits.framebufferDepthSampleCounts & limits.framebufferColorSampleCounts); // Create sidecar MSAA textures for color attachments if they don't already exist. for (int index = 0; index < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; index++) { - const VulkanAttachment& spec = color[index]; + VulkanAttachment const& spec = color[index]; VulkanTexture* texture = (VulkanTexture*) spec.texture; if (texture && texture->samples == 1) { auto msTexture = texture->getSidecar(); @@ -354,19 +354,19 @@ VkExtent2D VulkanRenderTarget::getExtent() const { return {width, height}; } -VulkanAttachment VulkanRenderTarget::getColor(int target) const { +VulkanAttachment& VulkanRenderTarget::getColor(int target) { return mColor[target]; } -VulkanAttachment VulkanRenderTarget::getMsaaColor(int target) const { +VulkanAttachment& VulkanRenderTarget::getMsaaColor(int target) { return mMsaaAttachments[target]; } -VulkanAttachment VulkanRenderTarget::getDepth() const { +VulkanAttachment& VulkanRenderTarget::getDepth() { return mDepth; } -VulkanAttachment VulkanRenderTarget::getMsaaDepth() const { +VulkanAttachment& VulkanRenderTarget::getMsaaDepth() { return mMsaaDepthAttachment; } diff --git a/filament/backend/src/vulkan/VulkanHandles.h b/filament/backend/src/vulkan/VulkanHandles.h index 111e1299a3f..bb20097a474 100644 --- a/filament/backend/src/vulkan/VulkanHandles.h +++ b/filament/backend/src/vulkan/VulkanHandles.h @@ -274,10 +274,11 @@ struct VulkanRenderTarget : private HwRenderTarget, VulkanResource { void transformClientRectToPlatform(VkRect2D* bounds) const; void transformClientRectToPlatform(VkViewport* bounds) const; VkExtent2D getExtent() const; - VulkanAttachment getColor(int target) const; - VulkanAttachment getMsaaColor(int target) const; - VulkanAttachment getDepth() const; - VulkanAttachment getMsaaDepth() const; + // We return references in the following methods to avoid a copy. + VulkanAttachment& getColor(int target); + VulkanAttachment& getMsaaColor(int target); + VulkanAttachment& getDepth(); + VulkanAttachment& getMsaaDepth(); uint8_t getColorTargetCount(const VulkanRenderPass& pass) const; uint8_t getSamples() const { return mSamples; } bool hasDepth() const { return mDepth.texture; } From 883507a26f321f58c92d4fb1f5164a9844c574d6 Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Mon, 29 Apr 2024 17:47:05 -0600 Subject: [PATCH 24/31] Remove spirv-opt size optimizations for MSL (#7495) --- libs/filamat/src/GLSLPostProcessor.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libs/filamat/src/GLSLPostProcessor.cpp b/libs/filamat/src/GLSLPostProcessor.cpp index ce8db716dfc..6765b122f51 100644 --- a/libs/filamat/src/GLSLPostProcessor.cpp +++ b/libs/filamat/src/GLSLPostProcessor.cpp @@ -604,7 +604,13 @@ std::shared_ptr GLSLPostProcessor::createOptimizer( }); if (optimization == MaterialBuilder::Optimization::SIZE) { - registerSizePasses(*optimizer, config); + // When optimizing for size, we don't run the SPIR-V through any size optimization passes + // when targeting MSL. This results in better line dictionary compression. We do, however, + // still register the passes necessary (below) to support half precision floating point + // math. + if (config.targetApi != MaterialBuilder::TargetApi::METAL) { + registerSizePasses(*optimizer, config); + } } else if (optimization == MaterialBuilder::Optimization::PERFORMANCE) { registerPerformancePasses(*optimizer, config); } @@ -719,7 +725,6 @@ void GLSLPostProcessor::registerSizePasses(Optimizer& optimizer, Config const& c RegisterPass(CreateWrapOpKillPass()); RegisterPass(CreateDeadBranchElimPass()); - RegisterPass(CreateMergeReturnPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateInlineExhaustivePass()); RegisterPass(CreateEliminateDeadFunctionsPass()); RegisterPass(CreatePrivateToLocalPass()); @@ -728,11 +733,9 @@ void GLSLPostProcessor::registerSizePasses(Optimizer& optimizer, Config const& c RegisterPass(CreateCCPPass()); RegisterPass(CreateLoopUnrollPass(true)); RegisterPass(CreateDeadBranchElimPass()); - RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateScalarReplacementPass(0)); RegisterPass(CreateLocalSingleStoreElimPass()); RegisterPass(CreateIfConversionPass()); - RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateAggressiveDCEPass()); RegisterPass(CreateDeadBranchElimPass()); RegisterPass(CreateBlockMergePass()); @@ -748,7 +751,6 @@ void GLSLPostProcessor::registerSizePasses(Optimizer& optimizer, Config const& c RegisterPass(CreateBlockMergePass()); RegisterPass(CreateLocalMultiStoreElimPass()); RegisterPass(CreateRedundancyEliminationPass()); - RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateAggressiveDCEPass()); RegisterPass(CreateCFGCleanupPass()); } From 56fa0783767b6f18a89e4ff2b38b7a1ca6dc8f40 Mon Sep 17 00:00:00 2001 From: Benjamin Doherty Date: Mon, 29 Apr 2024 16:49:27 -0700 Subject: [PATCH 25/31] Release Filament 1.51.6 --- NEW_RELEASE_NOTES.md | 5 ----- README.md | 4 ++-- RELEASE_NOTES.md | 7 +++++++ android/gradle.properties | 2 +- ios/CocoaPods/Filament.podspec | 4 ++-- web/filament-js/package.json | 2 +- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/NEW_RELEASE_NOTES.md b/NEW_RELEASE_NOTES.md index b78d8a399e9..4a1a9c7fa7e 100644 --- a/NEW_RELEASE_NOTES.md +++ b/NEW_RELEASE_NOTES.md @@ -7,8 +7,3 @@ for next branch cut* header. appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md). ## Release notes for next branch cut - -- Add new matedit tool -- filagui: Support rendering `GL_TEXTURE_EXTERNAL_OES` textures. -- `setFrameScheduledCallback` now takes a `utils::Invocable`. -- engine: Add `isPaused()` diff --git a/README.md b/README.md index c4dc01abefd..01948e41c86 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.51.5' + implementation 'com.google.android.filament:filament-android:1.51.6' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ```shell -pod 'Filament', '~> 1.51.5' +pod 'Filament', '~> 1.51.6' ``` ### Snapshots diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 7c6d175e8ec..e09d514005a 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,13 @@ A new header is inserted each time a *tag* is created. Instead, if you are authoring a PR for the main branch, add your release note to [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md). +## v1.51.7 + +- Add new matedit tool +- filagui: Support rendering `GL_TEXTURE_EXTERNAL_OES` textures. +- `setFrameScheduledCallback` now takes a `utils::Invocable`. +- engine: Add `isPaused()` + ## v1.51.6 - Add new matedit tool diff --git a/android/gradle.properties b/android/gradle.properties index 040c9248501..87decc65c2c 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.51.5 +VERSION_NAME=1.51.6 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 6bc053c37a9..3c879e3037e 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.51.5" + spec.version = "1.51.6" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.51.5/filament-v1.51.5-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.51.6/filament-v1.51.6-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/web/filament-js/package.json b/web/filament-js/package.json index 184fa5d996b..df1e4ffbd88 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.51.5", + "version": "1.51.6", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js", From 880b454702d8b17aa878aae95c6bb99bae7406cd Mon Sep 17 00:00:00 2001 From: Benjamin Doherty Date: Mon, 29 Apr 2024 16:49:37 -0700 Subject: [PATCH 26/31] Bump version to 1.51.7 --- README.md | 4 ++-- android/gradle.properties | 2 +- ios/CocoaPods/Filament.podspec | 4 ++-- web/filament-js/package.json | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 01948e41c86..f571123d1aa 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.51.6' + implementation 'com.google.android.filament:filament-android:1.51.7' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ```shell -pod 'Filament', '~> 1.51.6' +pod 'Filament', '~> 1.51.7' ``` ### Snapshots diff --git a/android/gradle.properties b/android/gradle.properties index 87decc65c2c..fd04602ea9c 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.51.6 +VERSION_NAME=1.51.7 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 3c879e3037e..ec121f61e9e 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.51.6" + spec.version = "1.51.7" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.51.6/filament-v1.51.6-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.51.7/filament-v1.51.7-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/web/filament-js/package.json b/web/filament-js/package.json index df1e4ffbd88..9523529679f 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.51.6", + "version": "1.51.7", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js", From 305bfb36d83d9742ba3d12f4e3928ad338c3d8b0 Mon Sep 17 00:00:00 2001 From: Eliza Velasquez Date: Tue, 30 Apr 2024 12:49:47 -0700 Subject: [PATCH 27/31] Fix broken ImGui on web I was unfortunately naive about the way that Filament handled external textures on non-GLES platforms. This fix restricts the changes to Android (which is the only place this change is required in the first place). Long story short, the change broke WebGL. Desktop seems to be unaffected. --- NEW_RELEASE_NOTES.md | 2 ++ libs/filagui/include/filagui/ImGuiHelper.h | 6 ++++-- libs/filagui/src/ImGuiHelper.cpp | 9 ++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/NEW_RELEASE_NOTES.md b/NEW_RELEASE_NOTES.md index 4a1a9c7fa7e..188a51f3662 100644 --- a/NEW_RELEASE_NOTES.md +++ b/NEW_RELEASE_NOTES.md @@ -7,3 +7,5 @@ for next branch cut* header. appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md). ## Release notes for next branch cut + +- filagui: Fix regression which broke WebGL diff --git a/libs/filagui/include/filagui/ImGuiHelper.h b/libs/filagui/include/filagui/ImGuiHelper.h index 9ad4f0765e1..3f55ac77bbd 100644 --- a/libs/filagui/include/filagui/ImGuiHelper.h +++ b/libs/filagui/include/filagui/ImGuiHelper.h @@ -84,12 +84,14 @@ class UTILS_PUBLIC ImGuiHelper { filament::View* mView; // The view is owned by the client. filament::Scene* mScene; filament::Material* mMaterial2d = nullptr; + std::vector mMaterial2dInstances; +#ifdef __ANDROID__ filament::Material* mMaterialExternal = nullptr; + std::vector mMaterialExternalInstances; +#endif filament::Camera* mCamera = nullptr; std::vector mVertexBuffers; std::vector mIndexBuffers; - std::vector mMaterial2dInstances; - std::vector mMaterialExternalInstances; utils::Entity mRenderable; utils::Entity mCameraEntity; filament::Texture* mTexture = nullptr; diff --git a/libs/filagui/src/ImGuiHelper.cpp b/libs/filagui/src/ImGuiHelper.cpp index 935ae138a4d..1a88013125f 100644 --- a/libs/filagui/src/ImGuiHelper.cpp +++ b/libs/filagui/src/ImGuiHelper.cpp @@ -64,10 +64,12 @@ ImGuiHelper::ImGuiHelper(Engine* engine, filament::View* view, const Path& fontP .package(FILAGUI_RESOURCES_UIBLIT_DATA, FILAGUI_RESOURCES_UIBLIT_SIZE) .constant("external", false) .build(*engine); +#ifdef __ANDROID__ mMaterialExternal = Material::Builder() .package(FILAGUI_RESOURCES_UIBLIT_DATA, FILAGUI_RESOURCES_UIBLIT_SIZE) .constant("external", true) .build(*engine); +#endif // If the given font path is invalid, ImGui will silently fall back to proggy, which is a // tiny "pixel art" texture that is compiled into the library. @@ -134,10 +136,12 @@ ImGuiHelper::~ImGuiHelper() { mEngine->destroy(mi); } mEngine->destroy(mMaterial2d); +#ifdef __ANDROID__ for (auto& mi : mMaterialExternalInstances) { mEngine->destroy(mi); } mEngine->destroy(mMaterialExternal); +#endif mEngine->destroy(mTexture); for (auto& vb : mVertexBuffers) { mEngine->destroy(vb); @@ -229,13 +233,16 @@ void ImGuiHelper::processImGuiCommands(ImDrawData* commands, const ImGuiIO& io) auto texture = (Texture const*)pcmd.TextureId; const char* uniformName; MaterialInstance* materialInstance; +#ifdef __ANDROID__ if (texture && texture->getTarget() == Texture::Sampler::SAMPLER_EXTERNAL) { if (materialExternalIndex == mMaterialExternalInstances.size()) { mMaterialExternalInstances.push_back(mMaterialExternal->createInstance()); } uniformName = "albedoExternal"; materialInstance = mMaterialExternalInstances[materialExternalIndex++]; - } else { + } else +#endif + { if (material2dIndex == mMaterial2dInstances.size()) { mMaterial2dInstances.push_back(mMaterial2d->createInstance()); } From b7eb12bd0cbf2212f7176d47838fd5f795d890d5 Mon Sep 17 00:00:00 2001 From: Eliza Velasquez Date: Tue, 30 Apr 2024 13:37:19 -0700 Subject: [PATCH 28/31] filagui: Split uiBlit material into two Unfortunately, the external uniform was not optimized out as expected. The only option is to split it into its own independent material. --- libs/filagui/CMakeLists.txt | 5 ++++ libs/filagui/src/ImGuiHelper.cpp | 15 ++++------ libs/filagui/src/materials/uiBlit.mat | 16 ++-------- libs/filagui/src/materials/uiBlitExternal.mat | 29 +++++++++++++++++++ 4 files changed, 41 insertions(+), 24 deletions(-) create mode 100644 libs/filagui/src/materials/uiBlitExternal.mat diff --git a/libs/filagui/CMakeLists.txt b/libs/filagui/CMakeLists.txt index 66058c4103c..55f3d867ff5 100644 --- a/libs/filagui/CMakeLists.txt +++ b/libs/filagui/CMakeLists.txt @@ -33,6 +33,11 @@ endif() set(MATERIAL_SRCS src/materials/uiBlit.mat) +if (ANDROID) + list(APPEND MATERIAL_SRCS + src/materials/uiBlitExternal.mat) +endif() + file(MAKE_DIRECTORY ${MATERIAL_DIR}) foreach (mat_src ${MATERIAL_SRCS}) diff --git a/libs/filagui/src/ImGuiHelper.cpp b/libs/filagui/src/ImGuiHelper.cpp index 1a88013125f..7d22547fdb9 100644 --- a/libs/filagui/src/ImGuiHelper.cpp +++ b/libs/filagui/src/ImGuiHelper.cpp @@ -62,12 +62,10 @@ ImGuiHelper::ImGuiHelper(Engine* engine, filament::View* view, const Path& fontP // Create a simple alpha-blended 2D blitting material. mMaterial2d = Material::Builder() .package(FILAGUI_RESOURCES_UIBLIT_DATA, FILAGUI_RESOURCES_UIBLIT_SIZE) - .constant("external", false) .build(*engine); #ifdef __ANDROID__ mMaterialExternal = Material::Builder() - .package(FILAGUI_RESOURCES_UIBLIT_DATA, FILAGUI_RESOURCES_UIBLIT_SIZE) - .constant("external", true) + .package(FILAGUI_RESOURCES_UIBLITEXTERNAL_DATA, FILAGUI_RESOURCES_UIBLITEXTERNAL_SIZE) .build(*engine); #endif @@ -81,7 +79,7 @@ ImGuiHelper::ImGuiHelper(Engine* engine, filament::View* view, const Path& fontP // For proggy, switch to NEAREST for pixel-perfect text. if (!fontPath.isFile() && !imGuiContext) { mSampler = TextureSampler(MinFilter::NEAREST, MagFilter::NEAREST); - mMaterial2d->setDefaultParameter("albedo2d", mTexture, mSampler); + mMaterial2d->setDefaultParameter("albedo", mTexture, mSampler); } utils::EntityManager& em = utils::EntityManager::get(); @@ -124,7 +122,7 @@ void ImGuiHelper::createAtlasTexture(Engine* engine) { mTexture->setImage(*engine, 0, std::move(pb)); mSampler = TextureSampler(MinFilter::LINEAR, MagFilter::LINEAR); - mMaterial2d->setDefaultParameter("albedo2d", mTexture, mSampler); + mMaterial2d->setDefaultParameter("albedo", mTexture, mSampler); } ImGuiHelper::~ImGuiHelper() { @@ -231,14 +229,12 @@ void ImGuiHelper::processImGuiCommands(ImDrawData* commands, const ImGuiIO& io) pcmd.UserCallback(cmds, &pcmd); } else { auto texture = (Texture const*)pcmd.TextureId; - const char* uniformName; MaterialInstance* materialInstance; #ifdef __ANDROID__ if (texture && texture->getTarget() == Texture::Sampler::SAMPLER_EXTERNAL) { if (materialExternalIndex == mMaterialExternalInstances.size()) { mMaterialExternalInstances.push_back(mMaterialExternal->createInstance()); } - uniformName = "albedoExternal"; materialInstance = mMaterialExternalInstances[materialExternalIndex++]; } else #endif @@ -246,7 +242,6 @@ void ImGuiHelper::processImGuiCommands(ImDrawData* commands, const ImGuiIO& io) if (material2dIndex == mMaterial2dInstances.size()) { mMaterial2dInstances.push_back(mMaterial2d->createInstance()); } - uniformName = "albedo2d"; materialInstance = mMaterial2dInstances[material2dIndex++]; } materialInstance->setScissor( @@ -256,9 +251,9 @@ void ImGuiHelper::processImGuiCommands(ImDrawData* commands, const ImGuiIO& io) (uint16_t) (pcmd.ClipRect.w - pcmd.ClipRect.y)); if (texture) { TextureSampler sampler(MinFilter::LINEAR, MagFilter::LINEAR); - materialInstance->setParameter(uniformName, texture, sampler); + materialInstance->setParameter("albedo", texture, sampler); } else { - materialInstance->setParameter(uniformName, mTexture, mSampler); + materialInstance->setParameter("albedo", mTexture, mSampler); } rbuilder .geometry(primIndex, RenderableManager::PrimitiveType::TRIANGLES, diff --git a/libs/filagui/src/materials/uiBlit.mat b/libs/filagui/src/materials/uiBlit.mat index 4f9e85c5438..774113fbd33 100644 --- a/libs/filagui/src/materials/uiBlit.mat +++ b/libs/filagui/src/materials/uiBlit.mat @@ -1,19 +1,9 @@ material { name : uiBlit, - constants : [ - { - name : external, - type : bool - } - ], parameters : [ - { - type : samplerExternal, - name : albedoExternal - }, { type : sampler2d, - name : albedo2d + name : albedo } ], requires : [ @@ -32,9 +22,7 @@ fragment { prepareMaterial(material); vec2 uv = getUV0(); uv.y = 1.0 - uv.y; - vec4 albedo = materialConstants_external - ? texture2D(materialParams_albedoExternal, uv) - : texture2D(materialParams_albedo2d, uv); + vec4 albedo = texture2D(materialParams_albedo, uv); material.baseColor = getColor() * albedo; material.baseColor.rgb *= material.baseColor.a; } diff --git a/libs/filagui/src/materials/uiBlitExternal.mat b/libs/filagui/src/materials/uiBlitExternal.mat new file mode 100644 index 00000000000..f07187837de --- /dev/null +++ b/libs/filagui/src/materials/uiBlitExternal.mat @@ -0,0 +1,29 @@ +material { + name : uiBlitExternal, + parameters : [ + { + type : samplerExternal, + name : albedo + } + ], + requires : [ + uv0, + color + ], + shadingModel : unlit, + culling : none, + depthCulling: false, + blending : transparent, + featureLevel : 0 +} + +fragment { + void material(inout MaterialInputs material) { + prepareMaterial(material); + vec2 uv = getUV0(); + uv.y = 1.0 - uv.y; + vec4 albedo = texture2D(materialParams_albedo, uv); + material.baseColor = getColor() * albedo; + material.baseColor.rgb *= material.baseColor.a; + } +} From 7ae2773222f87989c5408d7c188ca2c714dc1b32 Mon Sep 17 00:00:00 2001 From: Benjamin Doherty Date: Mon, 6 May 2024 10:20:48 -0700 Subject: [PATCH 29/31] Log excess buffer allocations for Metal --- filament/backend/src/metal/MetalBuffer.h | 14 ++++++++++++-- filament/backend/src/metal/MetalBuffer.mm | 3 ++- filament/backend/src/metal/MetalDriver.mm | 3 +++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/filament/backend/src/metal/MetalBuffer.h b/filament/backend/src/metal/MetalBuffer.h index 579975d0d6c..d676f2072a5 100644 --- a/filament/backend/src/metal/MetalBuffer.h +++ b/filament/backend/src/metal/MetalBuffer.h @@ -18,6 +18,7 @@ #define TNT_FILAMENT_DRIVER_METALBUFFER_H #include "MetalContext.h" +#include "MetalPlatform.h" #include @@ -33,10 +34,17 @@ namespace filament::backend { class TrackedMetalBuffer { public: + static constexpr size_t EXCESS_BUFFER_COUNT = 30000; + TrackedMetalBuffer() noexcept : mBuffer(nil) {} TrackedMetalBuffer(id buffer) noexcept : mBuffer(buffer) { if (buffer) { - aliveBuffers++; + if (aliveBuffers++ >= EXCESS_BUFFER_COUNT) { + if (platform && platform->hasDebugUpdateStatFunc()) { + platform->debugUpdateStat("filament.metal.excess_buffers_allocated", + TrackedMetalBuffer::getAliveBuffers()); + } + } } } ~TrackedMetalBuffer() { @@ -58,6 +66,7 @@ class TrackedMetalBuffer { operator bool() const noexcept { return bool(mBuffer); } static uint64_t getAliveBuffers() { return aliveBuffers; } + static void setPlatform(MetalPlatform* p) { platform = p; } private: void swap(TrackedMetalBuffer& other) noexcept { @@ -68,7 +77,8 @@ class TrackedMetalBuffer { id mBuffer; - static std::atomic aliveBuffers; + static MetalPlatform* platform; + static uint64_t aliveBuffers; }; class MetalBuffer { diff --git a/filament/backend/src/metal/MetalBuffer.mm b/filament/backend/src/metal/MetalBuffer.mm index af46027e20d..e0d542268c1 100644 --- a/filament/backend/src/metal/MetalBuffer.mm +++ b/filament/backend/src/metal/MetalBuffer.mm @@ -22,7 +22,8 @@ namespace filament { namespace backend { -std::atomic TrackedMetalBuffer::aliveBuffers = 0; +uint64_t TrackedMetalBuffer::aliveBuffers = 0; +MetalPlatform* TrackedMetalBuffer::platform = nullptr; MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType, BufferUsage usage, size_t size, bool forceGpuBuffer) : mBufferSize(size), mContext(context) { diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 83c858bad20..288b034d9c1 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -105,6 +105,8 @@ driverConfig.disableHandleUseAfterFreeCheck) { mContext->driver = this; + TrackedMetalBuffer::setPlatform(platform); + mContext->device = mPlatform.createDevice(); assert_invariant(mContext->device); @@ -198,6 +200,7 @@ } MetalDriver::~MetalDriver() noexcept { + TrackedMetalBuffer::setPlatform(nullptr); mContext->device = nil; mContext->emptyTexture = nil; CFRelease(mContext->textureCache); From 3ba082da13fcac5ff5a6632a1c306662722185d7 Mon Sep 17 00:00:00 2001 From: Benjamin Doherty Date: Mon, 6 May 2024 10:26:30 -0700 Subject: [PATCH 30/31] Metal: track types of buffers --- filament/backend/src/metal/MetalBuffer.h | 63 +++++++++++++++---- filament/backend/src/metal/MetalBuffer.mm | 5 +- filament/backend/src/metal/MetalBufferPool.mm | 2 +- filament/backend/src/metal/MetalDriver.mm | 6 ++ 4 files changed, 62 insertions(+), 14 deletions(-) diff --git a/filament/backend/src/metal/MetalBuffer.h b/filament/backend/src/metal/MetalBuffer.h index d676f2072a5..8586eeb845a 100644 --- a/filament/backend/src/metal/MetalBuffer.h +++ b/filament/backend/src/metal/MetalBuffer.h @@ -34,12 +34,38 @@ namespace filament::backend { class TrackedMetalBuffer { public: + static constexpr size_t EXCESS_BUFFER_COUNT = 30000; + enum class Type { + NONE = 0, + GENERIC = 1, + RING = 2, + STAGING = 3, + }; + static constexpr size_t TypeCount = 3; + + static constexpr auto toIndex(Type t) { + assert_invariant(t != Type::NONE); + switch (t) { + case Type::NONE: + case Type::GENERIC: + return 0; + case Type::RING: + return 1; + case Type::STAGING: + return 2; + } + } + TrackedMetalBuffer() noexcept : mBuffer(nil) {} - TrackedMetalBuffer(id buffer) noexcept : mBuffer(buffer) { + TrackedMetalBuffer(nullptr_t) noexcept : mBuffer(nil) {} + TrackedMetalBuffer(id buffer, Type type) : mBuffer(buffer), mType(type) { + assert_invariant(type != Type::NONE); if (buffer) { - if (aliveBuffers++ >= EXCESS_BUFFER_COUNT) { + aliveBuffers[toIndex(type)]++; + mType = type; + if (getAliveBuffers() >= EXCESS_BUFFER_COUNT) { if (platform && platform->hasDebugUpdateStatFunc()) { platform->debugUpdateStat("filament.metal.excess_buffers_allocated", TrackedMetalBuffer::getAliveBuffers()); @@ -47,9 +73,11 @@ class TrackedMetalBuffer { } } } + ~TrackedMetalBuffer() { if (mBuffer) { - aliveBuffers--; + assert_invariant(mType != Type::NONE); + aliveBuffers[toIndex(mType)]--; } } @@ -65,20 +93,31 @@ class TrackedMetalBuffer { id get() const noexcept { return mBuffer; } operator bool() const noexcept { return bool(mBuffer); } - static uint64_t getAliveBuffers() { return aliveBuffers; } + static uint64_t getAliveBuffers() { + uint64_t sum = 0; + for (const auto& v : aliveBuffers) { + sum += v; + } + return sum; + } + + static uint64_t getAliveBuffers(Type type) { + assert_invariant(type != Type::NONE); + return aliveBuffers[toIndex(type)]; + } static void setPlatform(MetalPlatform* p) { platform = p; } private: void swap(TrackedMetalBuffer& other) noexcept { - id temp = mBuffer; - mBuffer = other.mBuffer; - other.mBuffer = temp; + std::swap(mBuffer, other.mBuffer); + std::swap(mType, other.mType); } id mBuffer; + Type mType = Type::NONE; static MetalPlatform* platform; - static uint64_t aliveBuffers; + static std::array aliveBuffers; }; class MetalBuffer { @@ -181,7 +220,8 @@ class MetalRingBuffer { mBufferOptions(options), mSlotSizeBytes(computeSlotSize(layout)), mSlotCount(slotCount) { - mBuffer = [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions]; + mBuffer = { [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions], + TrackedMetalBuffer::Type::RING }; assert_invariant(mBuffer); } @@ -199,9 +239,10 @@ class MetalRingBuffer { // If we already have an aux buffer, it will get freed here, unless it has been retained // by a MTLCommandBuffer. In that case, it will be freed when the command buffer // finishes executing. - mAuxBuffer = [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions]; + mAuxBuffer = { [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions], + TrackedMetalBuffer::Type::RING }; assert_invariant(mAuxBuffer); - return {mAuxBuffer.get(), 0}; + return { mAuxBuffer.get(), 0 }; } mCurrentSlot = (mCurrentSlot + 1) % mSlotCount; mOccupiedSlots->fetch_add(1, std::memory_order_relaxed); diff --git a/filament/backend/src/metal/MetalBuffer.mm b/filament/backend/src/metal/MetalBuffer.mm index e0d542268c1..ec8a8878e2e 100644 --- a/filament/backend/src/metal/MetalBuffer.mm +++ b/filament/backend/src/metal/MetalBuffer.mm @@ -22,7 +22,7 @@ namespace filament { namespace backend { -uint64_t TrackedMetalBuffer::aliveBuffers = 0; +std::array TrackedMetalBuffer::aliveBuffers = { 0 }; MetalPlatform* TrackedMetalBuffer::platform = nullptr; MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType, BufferUsage usage, @@ -38,7 +38,8 @@ } // Otherwise, we allocate a private GPU buffer. - mBuffer = [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate]; + mBuffer = { [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate], + TrackedMetalBuffer::Type::GENERIC }; ASSERT_POSTCONDITION(mBuffer, "Could not allocate Metal buffer of size %zu.", size); } diff --git a/filament/backend/src/metal/MetalBufferPool.mm b/filament/backend/src/metal/MetalBufferPool.mm index 3b75c8e85d4..911bf84e4ac 100644 --- a/filament/backend/src/metal/MetalBufferPool.mm +++ b/filament/backend/src/metal/MetalBufferPool.mm @@ -46,7 +46,7 @@ options:MTLResourceStorageModeShared]; ASSERT_POSTCONDITION(buffer, "Could not allocate Metal staging buffer of size %zu.", numBytes); MetalBufferPoolEntry* stage = new MetalBufferPoolEntry { - .buffer = buffer, + .buffer = { buffer, TrackedMetalBuffer::Type::STAGING }, .capacity = numBytes, .lastAccessed = mCurrentFrame, .referenceCount = 1 diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 288b034d9c1..77ee6912dc0 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -222,6 +222,12 @@ #endif if (mPlatform.hasDebugUpdateStatFunc()) { mPlatform.debugUpdateStat("filament.metal.alive_buffers", TrackedMetalBuffer::getAliveBuffers()); + mPlatform.debugUpdateStat("filament.metal.alive_buffers.generic", + TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::GENERIC)); + mPlatform.debugUpdateStat("filament.metal.alive_buffers.ring", + TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::RING)); + mPlatform.debugUpdateStat("filament.metal.alive_buffers.staging", + TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::STAGING)); } } From 2d157e8fe1109aa27307ae15e27fc6bec4c959ce Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Mon, 6 May 2024 12:12:38 -0700 Subject: [PATCH 31/31] Add preferredShaderLanguage option to Engine::Config (#7816) --- NEW_RELEASE_NOTES.md | 1 + filament/include/filament/Engine.h | 22 ++++++++++++++++++++++ filament/src/details/Engine.h | 4 ++++ 3 files changed, 27 insertions(+) diff --git a/NEW_RELEASE_NOTES.md b/NEW_RELEASE_NOTES.md index 188a51f3662..e4728cb677a 100644 --- a/NEW_RELEASE_NOTES.md +++ b/NEW_RELEASE_NOTES.md @@ -9,3 +9,4 @@ appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md). ## Release notes for next branch cut - filagui: Fix regression which broke WebGL +- Add a new Engine::Config setting to control preferred shader language diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h index e5b142c3147..a737469b768 100644 --- a/filament/include/filament/Engine.h +++ b/filament/include/filament/Engine.h @@ -340,6 +340,28 @@ class UTILS_PUBLIC Engine { * Disable backend handles use-after-free checks. */ bool disableHandleUseAfterFreeCheck = false; + + /* + * Sets a preferred shader language for Filament to use. + * + * The Metal backend supports two shader languages: MSL (Metal Shading Language) and + * METAL_LIBRARY (precompiled .metallib). This option controls which shader language is + * used when materials contain both. + * + * By default, when preferredShaderLanguage is unset, Filament will prefer METAL_LIBRARY + * shaders if present within a material, falling back to MSL. Setting + * preferredShaderLanguage to ShaderLanguage::MSL will instead instruct Filament to check + * for the presence of MSL in a material first, falling back to METAL_LIBRARY if MSL is not + * present. + * + * When using a non-Metal backend, setting this has no effect. + */ + enum class ShaderLanguage { + DEFAULT = 0, + MSL = 1, + METAL_LIBRARY = 2, + }; + ShaderLanguage preferredShaderLanguage = ShaderLanguage::DEFAULT; }; diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h index 88434071a5e..5d72ac9c2ee 100644 --- a/filament/src/details/Engine.h +++ b/filament/src/details/Engine.h @@ -246,6 +246,10 @@ class FEngine : public Engine { case Backend::VULKAN: return { backend::ShaderLanguage::SPIRV }; case Backend::METAL: + const auto& lang = mConfig.preferredShaderLanguage; + if (lang == Config::ShaderLanguage::MSL) { + return { backend::ShaderLanguage::MSL, backend::ShaderLanguage::METAL_LIBRARY }; + } return { backend::ShaderLanguage::METAL_LIBRARY, backend::ShaderLanguage::MSL }; } }