From 1c7293db8da4352cecbc900153187c6ce6a60dbf Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 15 Aug 2023 11:55:28 -0700 Subject: [PATCH 01/23] fix fuzzyEqual - the return value was inverted - fuzzyEqual could generate alignment faults - move it out of mat4 and mat2 because it was only used in one place. --- filament/src/Froxelizer.cpp | 20 +++++++++++++++++--- libs/math/include/math/mat2.h | 17 ----------------- libs/math/include/math/mat4.h | 18 ------------------ 3 files changed, 17 insertions(+), 38 deletions(-) diff --git a/filament/src/Froxelizer.cpp b/filament/src/Froxelizer.cpp index 995fc76af24..52e0d71a845 100644 --- a/filament/src/Froxelizer.cpp +++ b/filament/src/Froxelizer.cpp @@ -81,6 +81,21 @@ struct Froxelizer::FroxelThreadData : public std::array { }; + +// Returns false if the two matrices are different. May return false if they're the +// same, with some elements only differing by +0 or -0. Behaviour is undefined with NaNs. +static bool fuzzyEqual(mat4f const& UTILS_RESTRICT l, mat4f const& UTILS_RESTRICT r) noexcept { + auto const li = reinterpret_cast( reinterpret_cast(&l) ); + auto const ri = reinterpret_cast( reinterpret_cast(&r) ); + uint32_t result = 0; + for (size_t i = 0; i < sizeof(mat4f) / sizeof(uint32_t); i++) { + // clang fully vectorizes this + result |= li[i] ^ ri[i]; + } + return result == 0; +} + + Froxelizer::Froxelizer(FEngine& engine) : mArena("froxel", PER_FROXELDATA_ARENA_SIZE), mZLightNear(FROXEL_FIRST_SLICE_DEPTH), @@ -144,9 +159,8 @@ void Froxelizer::setViewport(filament::Viewport const& viewport) noexcept { } void Froxelizer::setProjection(const mat4f& projection, - float near, - UTILS_UNUSED float far) noexcept { - if (UTILS_UNLIKELY(mat4f::fuzzyEqual(mProjection, projection))) { + float near, UTILS_UNUSED float far) noexcept { + if (UTILS_UNLIKELY(!fuzzyEqual(mProjection, projection))) { mProjection = projection; mNear = near; mDirtyFlags |= PROJECTION_CHANGED; diff --git a/libs/math/include/math/mat2.h b/libs/math/include/math/mat2.h index 551fe4451ed..dba9ca47230 100644 --- a/libs/math/include/math/mat2.h +++ b/libs/math/include/math/mat2.h @@ -235,23 +235,6 @@ class MATH_EMPTY_BASES TMat22 : return r; } - // returns false if the two matrices are different. May return false if they're the - // same, with some elements only differing by +0 or -0. Behaviour is undefined with NaNs. - static constexpr bool fuzzyEqual(TMat22 l, TMat22 r) noexcept { - uint64_t const* const li = reinterpret_cast(&l); - uint64_t const* const ri = reinterpret_cast(&r); - uint64_t result = 0; - // For some reason clang is not able to vectoize this loop when the number of iteration - // is known and constant (!?!?!). Still this is better than operator==. -#if defined(__clang__) -#pragma clang loop vectorize_width(2) -#endif - for (size_t i = 0; i < sizeof(TMat22) / sizeof(uint64_t); i++) { - result |= li[i] ^ ri[i]; - } - return result != 0; - } - template static constexpr TMat22 translation(const TVec2& t) noexcept { TMat22 r; diff --git a/libs/math/include/math/mat4.h b/libs/math/include/math/mat4.h index fa5301adfaa..d44081b2648 100644 --- a/libs/math/include/math/mat4.h +++ b/libs/math/include/math/mat4.h @@ -272,24 +272,6 @@ class MATH_EMPTY_BASES TMat44 : template constexpr TMat44(const TMat33& matrix, const TVec4& column3) noexcept; - /* - * helpers - */ - - // returns false if the two matrices are different. May return false if they're the - // same, with some elements only differing by +0 or -0. Behaviour is undefined with NaNs. - static constexpr bool fuzzyEqual(TMat44 const& l, TMat44 const& r) noexcept { - uint64_t const* const li = reinterpret_cast(&l); - uint64_t const* const ri = reinterpret_cast(&r); - uint64_t result = 0; - // For some reason clang is not able to vectorize this loop when the number of iteration - // is known and constant (!?!?!). Still this is better than operator==. - for (size_t i = 0; i < sizeof(TMat44) / sizeof(uint64_t); i++) { - result |= li[i] ^ ri[i]; - } - return result != 0; - } - static constexpr TMat44 ortho(T left, T right, T bottom, T top, T near, T far) noexcept; static constexpr TMat44 frustum(T left, T right, T bottom, T top, T near, T far) noexcept; From 288b59a34821504557a75962606cad0bf254e292 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Wed, 16 Aug 2023 12:18:15 -0700 Subject: [PATCH 02/23] Fix missing createFence (#7076) Continuing from #7072 --- filament/src/details/Engine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filament/src/details/Engine.cpp b/filament/src/details/Engine.cpp index 59b5377b219..3e50fc71352 100644 --- a/filament/src/details/Engine.cpp +++ b/filament/src/details/Engine.cpp @@ -536,7 +536,7 @@ void FEngine::flushAndWait() { // then create a fence that will trigger when we're past the finish() above size_t tryCount = 8; - FFence* fence = FEngine::createFence(FFence::Type::SOFT); + FFence* fence = FEngine::createFence(); UTILS_NOUNROLL do { FenceStatus status = fence->wait(FFence::Mode::FLUSH,250000000u); From 46e4e966b9f374098e7be5a55d800f0ac287e7d5 Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Wed, 16 Aug 2023 14:23:24 -0700 Subject: [PATCH 03/23] Fix assert with matdbg enabled (#7079) --- filament/src/details/Material.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filament/src/details/Material.h b/filament/src/details/Material.h index a5372249063..e4b6f975c93 100644 --- a/filament/src/details/Material.h +++ b/filament/src/details/Material.h @@ -101,7 +101,7 @@ class FMaterial : public Material { // Must be called after prepareProgram(). [[nodiscard]] backend::Handle getProgram(Variant variant) const noexcept { #if FILAMENT_ENABLE_MATDBG - assert_invariant(variant.key < VARIANT_COUNT); + assert_invariant((size_t)variant.key < VARIANT_COUNT); std::unique_lock lock(mActiveProgramsLock); mActivePrograms.set(variant.key); lock.unlock(); From c0db909c13299d4959b1543595d495d62a52990d Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Wed, 16 Aug 2023 15:59:07 -0700 Subject: [PATCH 04/23] don't use eglMakeCurrent with EGL_NO_SURFACE unless we're allowed EGL_KHR_surfaceless_context is needed to be able to use eglMakeCurrent without an EGLSurface. --- .../android/filament/ibl/MainActivity.kt | 7 ++++--- .../include/backend/platforms/PlatformEGL.h | 1 + .../src/opengl/platforms/PlatformEGL.cpp | 21 ++++++++++++------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt b/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt index 1554bbdbbe0..723a642307c 100644 --- a/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt +++ b/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt @@ -118,9 +118,10 @@ class MainActivity : Activity() { } private fun setupView() { - val ssaoOptions = view.ambientOcclusionOptions - ssaoOptions.enabled = true - view.ambientOcclusionOptions = ssaoOptions + // ambient occlusion is the cheapest effect that adds a lot of quality + view.ambientOcclusionOptions = view.ambientOcclusionOptions.apply { + enabled = true + } // NOTE: Try to disable post-processing (tone-mapping, etc.) to see the difference // view.isPostProcessingEnabled = false diff --git a/filament/backend/include/backend/platforms/PlatformEGL.h b/filament/backend/include/backend/platforms/PlatformEGL.h index 8902f14f767..08124909a2d 100644 --- a/filament/backend/include/backend/platforms/PlatformEGL.h +++ b/filament/backend/include/backend/platforms/PlatformEGL.h @@ -139,6 +139,7 @@ class PlatformEGL : public OpenGLPlatform { bool KHR_create_context = false; bool KHR_gl_colorspace = false; bool KHR_no_config_context = false; + bool KHR_surfaceless_context = false; } egl; } ext; diff --git a/filament/backend/src/opengl/platforms/PlatformEGL.cpp b/filament/backend/src/opengl/platforms/PlatformEGL.cpp index 08b20e0b42c..df13526d3c3 100644 --- a/filament/backend/src/opengl/platforms/PlatformEGL.cpp +++ b/filament/backend/src/opengl/platforms/PlatformEGL.cpp @@ -118,6 +118,7 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon ext.egl.KHR_create_context = extensions.has("EGL_KHR_create_context"); ext.egl.KHR_gl_colorspace = extensions.has("EGL_KHR_gl_colorspace"); ext.egl.KHR_no_config_context = extensions.has("EGL_KHR_no_config_context"); + ext.egl.KHR_surfaceless_context = extensions.has("KHR_surfaceless_context"); eglCreateSyncKHR = (PFNEGLCREATESYNCKHRPROC) eglGetProcAddress("eglCreateSyncKHR"); eglDestroySyncKHR = (PFNEGLDESTROYSYNCKHRPROC) eglGetProcAddress("eglDestroySyncKHR"); @@ -181,11 +182,13 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon eglConfig = mEGLConfig; } - // create the dummy surface, just for being able to make the context current. - mEGLDummySurface = eglCreatePbufferSurface(mEGLDisplay, mEGLConfig, pbufferAttribs); - if (UTILS_UNLIKELY(mEGLDummySurface == EGL_NO_SURFACE)) { - logEglError("eglCreatePbufferSurface"); - goto error; + if (UTILS_UNLIKELY(!ext.egl.KHR_surfaceless_context)) { + // create the dummy surface, just for being able to make the context current. + mEGLDummySurface = eglCreatePbufferSurface(mEGLDisplay, mEGLConfig, pbufferAttribs); + if (UTILS_UNLIKELY(mEGLDummySurface == EGL_NO_SURFACE)) { + logEglError("eglCreatePbufferSurface"); + goto error; + } } for (size_t tries = 0; tries < 3; tries++) { @@ -255,7 +258,7 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon } bool PlatformEGL::isExtraContextSupported() const noexcept { - return true; + return ext.egl.KHR_surfaceless_context; } void PlatformEGL::createContext(bool shared) { @@ -286,8 +289,10 @@ EGLBoolean PlatformEGL::makeCurrent(EGLSurface drawSurface, EGLSurface readSurfa } void PlatformEGL::terminate() noexcept { - eglMakeCurrent(mEGLDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); - eglDestroySurface(mEGLDisplay, mEGLDummySurface); + eglMakeCurrent(mEGLDisplay, mEGLDummySurface, mEGLDummySurface, EGL_NO_CONTEXT); + if (mEGLDummySurface) { + eglDestroySurface(mEGLDisplay, mEGLDummySurface); + } eglDestroyContext(mEGLDisplay, mEGLContext); for (auto context : mAdditionalContexts) { eglDestroyContext(mEGLDisplay, context); From c0389ac54c3b89cebaf3e6d9c221395128d3dbdf Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 15 Aug 2023 16:24:13 -0700 Subject: [PATCH 05/23] rework ShaderCompilerService to improve performance - remove support for non-shared contextes parallel compilation. this wasn't used. we can always revive it later if we need to. - rework how callbacks work so that we don't have to use a work list executed at each tick() in the shared context case (common case). this improves performance significantly on low-end devices, by not having to go through the list to check if all programs are compiled, multiple times per frame. The new CallbackManager handles scheduling the callbacks after all previous programs are compiled. --- filament/backend/CMakeLists.txt | 2 + filament/backend/src/CompilerThreadPool.cpp | 9 +- filament/backend/src/DriverBase.h | 14 +- .../backend/src/opengl/CallbackManager.cpp | 69 ++++ filament/backend/src/opengl/CallbackManager.h | 98 ++++++ filament/backend/src/opengl/OpenGLDriver.cpp | 2 +- .../src/opengl/ShaderCompilerService.cpp | 327 +++++++----------- .../src/opengl/ShaderCompilerService.h | 12 +- 8 files changed, 305 insertions(+), 228 deletions(-) create mode 100644 filament/backend/src/opengl/CallbackManager.cpp create mode 100644 filament/backend/src/opengl/CallbackManager.h diff --git a/filament/backend/CMakeLists.txt b/filament/backend/CMakeLists.txt index 550f569f02d..75416a3d4d1 100644 --- a/filament/backend/CMakeLists.txt +++ b/filament/backend/CMakeLists.txt @@ -68,6 +68,8 @@ set(PRIVATE_HDRS if (FILAMENT_SUPPORTS_OPENGL AND NOT FILAMENT_USE_EXTERNAL_GLES3 AND NOT FILAMENT_USE_SWIFTSHADER) list(APPEND SRCS include/backend/platforms/OpenGLPlatform.h + src/opengl/CallbackManager.h + src/opengl/CallbackManager.cpp src/opengl/gl_headers.cpp src/opengl/gl_headers.h src/opengl/GLUtils.cpp diff --git a/filament/backend/src/CompilerThreadPool.cpp b/filament/backend/src/CompilerThreadPool.cpp index 008b353aaa1..eeb116db3b2 100644 --- a/filament/backend/src/CompilerThreadPool.cpp +++ b/filament/backend/src/CompilerThreadPool.cpp @@ -16,6 +16,8 @@ #include "CompilerThreadPool.h" +#include + #include namespace filament::backend { @@ -38,6 +40,7 @@ void CompilerThreadPool::init(uint32_t threadCount, JobSystem::Priority priority for (size_t i = 0; i < threadCount; i++) { mCompilerThreads.emplace_back([this, priority, setup]() { + SYSTRACE_CONTEXT(); // give the thread a name JobSystem::setThreadName("CompilerThreadPool"); // run at a slightly lower priority than other filament threads @@ -53,7 +56,11 @@ void CompilerThreadPool::init(uint32_t threadCount, JobSystem::Priority priority (!std::all_of( std::begin(mQueues), std::end(mQueues), [](auto&& q) { return q.empty(); })); }); - if (!mExitRequested) { + + SYSTRACE_VALUE32("CompilerThreadPool Jobs", + mQueues[0].size() + mQueues[1].size()); + + if (UTILS_LIKELY(!mExitRequested)) { Job job; // use the first queue that's not empty auto& queue = [this]() -> auto& { diff --git a/filament/backend/src/DriverBase.h b/filament/backend/src/DriverBase.h index 3e7f2647d2f..abf68901164 100644 --- a/filament/backend/src/DriverBase.h +++ b/filament/backend/src/DriverBase.h @@ -165,13 +165,6 @@ class DriverBase : public Driver { void purge() noexcept final; - // -------------------------------------------------------------------------------------------- - // Privates - // -------------------------------------------------------------------------------------------- - -protected: - class CallbackDataDetails; - // Helpers... struct CallbackData { CallbackData(CallbackData const &) = delete; @@ -202,6 +195,13 @@ class DriverBase : public Driver { void scheduleCallback(CallbackHandler* handler, void* user, CallbackHandler::Callback callback); + // -------------------------------------------------------------------------------------------- + // Privates + // -------------------------------------------------------------------------------------------- + +protected: + class CallbackDataDetails; + inline void scheduleDestroy(BufferDescriptor&& buffer) noexcept { if (buffer.hasCallback()) { scheduleDestroySlow(std::move(buffer)); diff --git a/filament/backend/src/opengl/CallbackManager.cpp b/filament/backend/src/opengl/CallbackManager.cpp new file mode 100644 index 00000000000..8d85a9f4886 --- /dev/null +++ b/filament/backend/src/opengl/CallbackManager.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CallbackManager.h" + +#include "DriverBase.h" + +namespace filament::backend { + +CallbackManager::CallbackManager(DriverBase& driver) noexcept + : mDriver(driver), mCallbacks(1) { +} + +CallbackManager::~CallbackManager() noexcept = default; + +void CallbackManager::terminate() noexcept { + for (auto&& item: mCallbacks) { + if (item.func) { + mDriver.scheduleCallback( + item.handler, item.user, item.func); + } + } +} + +CallbackManager::Handle CallbackManager::get() const noexcept { + Container::const_iterator const curr = getCurrent(); + curr->count.fetch_add(1); + return curr; +} + +void CallbackManager::put(Handle& curr) noexcept { + if (curr->count.fetch_sub(1) == 1) { + if (curr->func) { + mDriver.scheduleCallback( + curr->handler, curr->user, curr->func); + destroySlot(curr); + } + } + curr = {}; +} + +void CallbackManager::setCallback( + CallbackHandler* handler, CallbackHandler::Callback func, void* user) { + assert_invariant(func); + Container::iterator const curr = allocateNewSlot(); + curr->handler = handler; + curr->func = func; + curr->user = user; + if (curr->count == 0) { + mDriver.scheduleCallback( + curr->handler, curr->user, curr->func); + destroySlot(curr); + } +} + +} // namespace filament::backend diff --git a/filament/backend/src/opengl/CallbackManager.h b/filament/backend/src/opengl/CallbackManager.h new file mode 100644 index 00000000000..5349f201265 --- /dev/null +++ b/filament/backend/src/opengl/CallbackManager.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_FILAMENT_BACKEND_OPENGL_CALLBACKMANAGER_H +#define TNT_FILAMENT_BACKEND_OPENGL_CALLBACKMANAGER_H + +#include + +#include + +#include +#include +#include + +namespace filament::backend { + +class DriverBase; +class CallbackHandler; + +/* + * CallbackManager schedules user callbacks once all previous conditions are met. + * A "Condition" is created by calling "get" and is met by calling "put". These + * are typically called from different threads. + * The callback is specified with "setCallback", which atomically creates a new set of + * conditions to be met. + */ +class CallbackManager { + struct Callback { + mutable std::atomic_int count{}; + CallbackHandler* handler = nullptr; + CallbackHandler::Callback func = {}; + void* user = nullptr; + }; + + using Container = std::list; + +public: + using Handle = Container::const_iterator; + + explicit CallbackManager(DriverBase& driver) noexcept; + + ~CallbackManager() noexcept; + + // Calls all the pending callbacks regardless of remaining conditions to be met. This is to + // avoid leaking resources for instance. It also doesn't matter if the conditions are met + // because we're shutting down. + void terminate() noexcept; + + // creates a condition and get a handle for it + Handle get() const noexcept; + + // Announces the specified condition is met. If a callback was specified and all conditions + // prior to setting the callback are met, the callback is scheduled. + void put(Handle& curr) noexcept; + + // Sets a callback to be called when all previously created (get) conditions are met (put). + // If there were no conditions created, or they're all already met, the callback is scheduled + // immediately. + void setCallback(CallbackHandler* handler, CallbackHandler::Callback func, void* user); + +private: + Container::const_iterator getCurrent() const noexcept { + std::lock_guard const lock(mLock); + return --mCallbacks.end(); + } + + Container::iterator allocateNewSlot() noexcept { + std::lock_guard const lock(mLock); + auto curr = --mCallbacks.end(); + mCallbacks.emplace_back(); + return curr; + } + void destroySlot(Container::const_iterator curr) noexcept { + std::lock_guard const lock(mLock); + mCallbacks.erase(curr); + } + + DriverBase& mDriver; + mutable utils::Mutex mLock; + Container mCallbacks; +}; + +} // namespace filament::backend + +#endif // TNT_FILAMENT_BACKEND_OPENGL_CALLBACKMANAGER_H diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index 47cc50edfc2..3d4eef3076d 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -2593,7 +2593,7 @@ bool OpenGLDriver::getTimerQueryValue(Handle tqh, uint64_t* elapse void OpenGLDriver::compilePrograms(CompilerPriorityQueue priority, CallbackHandler* handler, CallbackHandler::Callback callback, void* user) { if (callback) { - getShaderCompilerService().notifyWhenAllProgramsAreReady(priority, handler, callback, user); + getShaderCompilerService().notifyWhenAllProgramsAreReady(handler, callback, user); } } diff --git a/filament/backend/src/opengl/ShaderCompilerService.cpp b/filament/backend/src/opengl/ShaderCompilerService.cpp index 3dfdea5f478..dcbfb0f9f94 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.cpp +++ b/filament/backend/src/opengl/ShaderCompilerService.cpp @@ -64,17 +64,17 @@ static inline std::string to_string(float f) noexcept { // ------------------------------------------------------------------------------------------------ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken { - struct ProgramBinary { - GLenum format{}; + struct ProgramData { GLuint program{}; std::array shaders{}; - std::vector blob; }; - ~OpenGLProgramToken(); + ~OpenGLProgramToken() override; + OpenGLProgramToken(ShaderCompilerService& compiler, utils::CString const& name) noexcept : compiler(compiler), name(name) { } + ShaderCompilerService& compiler; utils::CString const& name; utils::FixedCapacityVector> attributes; @@ -86,22 +86,21 @@ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken { } gl; // 12 bytes - // Sets the programBinary, typically from the compiler thread, and signal the main thread. + // Sets the programData, typically from the compiler thread, and signal the main thread. // This is similar to std::promise::set_value. - void set(ProgramBinary programBinary) noexcept { - using std::swap; + void set(ProgramData const& data) noexcept { std::unique_lock const l(lock); - swap(binary, programBinary); + programData = data; signaled = true; cond.notify_one(); } // Get the programBinary, wait if necessary. // This is similar to std::future::get - ProgramBinary const& get() const noexcept { + ProgramData const& get() const noexcept { std::unique_lock l(lock); cond.wait(l, [this](){ return signaled; }); - return binary; + return programData; } // Checks if the programBinary is ready. @@ -112,10 +111,11 @@ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken { return cond.wait_for(l, 0s, [this](){ return signaled; }); } + CallbackManager::Handle handle{}; BlobCacheKey key; mutable utils::Mutex lock; mutable utils::Condition cond; - ProgramBinary binary; + ProgramData programData; bool signaled = false; bool canceled = false; // not part of the signaling @@ -135,6 +135,7 @@ void* ShaderCompilerService::getUserData(const program_token_t& token) noexcept ShaderCompilerService::ShaderCompilerService(OpenGLDriver& driver) : mDriver(driver), + mCallbackManager(driver), KHR_parallel_shader_compile(driver.getContext().ext.KHR_parallel_shader_compile) { } @@ -171,31 +172,25 @@ void ShaderCompilerService::init() noexcept { mShaderCompilerThreadCount = poolSize; mCompilerThreadPool.init(mShaderCompilerThreadCount, priority, - [platform = &mDriver.mPlatform, sharedContext = mUseSharedContext]() { + [platform = &mDriver.mPlatform]() { // create a gl context current to this thread - platform->createContext(sharedContext); + platform->createContext(true); }); } } } void ShaderCompilerService::terminate() noexcept { - // We could have some pending callbacks here, we need to execute them. - // This is equivalent to calling cancelTickOp() on all active tokens. - for (auto&& op: mRunAtNextTickOps) { - auto const& [priority, token, job] = op; - if (!token && job.callback) { - // This is a little fragile here. We know by construction that jobs that have a - // null token are the ones that dispatch the user callbacks. - mDriver.scheduleCallback(job.handler, job.user, job.callback); - } - } - mRunAtNextTickOps.clear(); - // Finally stop the thread pool immediately. Pending jobs will be discarded. We guarantee by // construction that nobody is waiting on a token (because waiting is only done on the main // backend thread, and if we're here, we're on the backend main thread). mCompilerThreadPool.terminate(); + + mRunAtNextTickOps.clear(); + + // We could have some pending callbacks here, we need to execute them. + // This is equivalent to calling cancelTickOp() on all active tokens. + mCallbackManager.terminate(); } ShaderCompilerService::program_token_t ShaderCompilerService::createProgram( @@ -203,132 +198,104 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram( auto& gl = mDriver.getContext(); auto token = std::make_shared(*this, name); - if (UTILS_UNLIKELY(gl.isES2())) { token->attributes = std::move(program.getAttributes()); } token->gl.program = OpenGLBlobCache::retrieve(&token->key, mDriver.mPlatform, program); - if (!token->gl.program) { - CompilerPriorityQueue const priorityQueue = program.getPriorityQueue(); - if (mShaderCompilerThreadCount) { - // queue a compile job - mCompilerThreadPool.queue(priorityQueue, token, - [this, &gl, program = std::move(program), token]() mutable { - - // compile the shaders - std::array shaders{}; - std::array shaderSourceCode; - compileShaders(gl, - std::move(program.getShadersSource()), - program.getSpecializationConstants(), - shaders, - shaderSourceCode); - - // link the program - GLuint const glProgram = linkProgram(gl, shaders, token->attributes); - - OpenGLProgramToken::ProgramBinary binary; - binary.shaders = shaders; - - if (UTILS_LIKELY(mUseSharedContext)) { - // We need to query the link status here to guarantee that the - // program is compiled and linked now (we don't want this to be - // deferred to later). We don't care about the result at this point. - GLint status; - glGetProgramiv(glProgram, GL_LINK_STATUS, &status); - binary.program = glProgram; - if (token->key) { - // Attempt to cache. This calls glGetProgramBinary. - OpenGLBlobCache::insert(mDriver.mPlatform, token->key, glProgram); - } - } -#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 - else { - // retrieve the program binary - GLsizei programBinarySize = 0; - glGetProgramiv(glProgram, GL_PROGRAM_BINARY_LENGTH, &programBinarySize); - assert_invariant(programBinarySize); - if (programBinarySize) { - binary.blob.resize(programBinarySize); - glGetProgramBinary(glProgram, programBinarySize, - &programBinarySize, &binary.format, binary.blob.data()); - } - // and we can destroy the program - glDeleteProgram(glProgram); - if (token->key) { - // attempt to cache - OpenGLBlobCache::insert(mDriver.mPlatform, token->key, - binary.format, - binary.blob.data(), GLsizei(binary.blob.size())); - } - } -#endif - // we don't need to check for success here, it'll be done on the - // main thread side. - token->set(std::move(binary)); - }); - } else - { - // this cannot fail because we check compilation status after linking the program - // shaders[] is filled with id of shader stages present. - compileShaders(gl, - std::move(program.getShadersSource()), - program.getSpecializationConstants(), - token->gl.shaders, - token->shaderSourceCode); + if (token->gl.program) { + return token; + } - } + token->handle = mCallbackManager.get(); + + CompilerPriorityQueue const priorityQueue = program.getPriorityQueue(); + if (mShaderCompilerThreadCount) { + // queue a compile job + mCompilerThreadPool.queue(priorityQueue, token, + [this, &gl, program = std::move(program), token]() mutable { + // compile the shaders + std::array shaders{}; + std::array shaderSourceCode; + compileShaders(gl, + std::move(program.getShadersSource()), + program.getSpecializationConstants(), + shaders, + shaderSourceCode); + + // link the program + GLuint const glProgram = linkProgram(gl, shaders, token->attributes); + + OpenGLProgramToken::ProgramData programData; + programData.shaders = shaders; + + // We need to query the link status here to guarantee that the + // program is compiled and linked now (we don't want this to be + // deferred to later). We don't care about the result at this point. + GLint status; + glGetProgramiv(glProgram, GL_LINK_STATUS, &status); + programData.program = glProgram; + + token->gl.program = programData.program; + + // we don't need to check for success here, it'll be done on the + // main thread side. + token->set(programData); + + mCallbackManager.put(token->handle); + + // caching must be the last thing we do + if (token->key) { + // Attempt to cache. This calls glGetProgramBinary. + OpenGLBlobCache::insert(mDriver.mPlatform, token->key, glProgram); + } + }); + + } else { + // this cannot fail because we check compilation status after linking the program + // shaders[] is filled with id of shader stages present. + compileShaders(gl, + std::move(program.getShadersSource()), + program.getSpecializationConstants(), + token->gl.shaders, + token->shaderSourceCode); runAtNextTick(priorityQueue, token, [this, token](Job const&) { - if (mShaderCompilerThreadCount) { - if (!token->gl.program) { - // TODO: see if we could completely eliminate this callback here - // and instead just rely on token->gl.program being atomically - // set by the compiler thread. - // we're using the compiler thread, check if the program is ready, no-op if not. - if (!token->isReady()) { + if (KHR_parallel_shader_compile) { + // don't attempt to link this program if all shaders are not done compiling + GLint status; + if (token->gl.program) { + glGetProgramiv(token->gl.program, GL_COMPLETION_STATUS, &status); + if (status == GL_FALSE) { return false; } - // program binary is ready, retrieve it without blocking - ShaderCompilerService::getProgramFromCompilerPool( - const_cast(token)); - } - } else { - if (KHR_parallel_shader_compile) { - // don't attempt to link this program if all shaders are not done compiling - GLint status; - if (token->gl.program) { - glGetProgramiv(token->gl.program, GL_COMPLETION_STATUS, &status); - if (status == GL_FALSE) { - return false; - } - } else { - for (auto shader: token->gl.shaders) { - if (shader) { - glGetShaderiv(shader, GL_COMPLETION_STATUS, &status); - if (status == GL_FALSE) { - return false; - } + } else { + for (auto shader: token->gl.shaders) { + if (shader) { + glGetShaderiv(shader, GL_COMPLETION_STATUS, &status); + if (status == GL_FALSE) { + return false; } } } } + } - if (!token->gl.program) { - // link the program, this also cannot fail because status is checked later. - token->gl.program = linkProgram(mDriver.getContext(), - token->gl.shaders, token->attributes); - if (KHR_parallel_shader_compile) { - // wait until the link finishes... - return false; - } + if (!token->gl.program) { + // link the program, this also cannot fail because status is checked later. + token->gl.program = linkProgram(mDriver.getContext(), + token->gl.shaders, token->attributes); + if (KHR_parallel_shader_compile) { + // wait until the link finishes... + return false; } } assert_invariant(token->gl.program); - if (token->key && !mShaderCompilerThreadCount) { + mCallbackManager.put(token->handle); + + if (token->key) { // TODO: technically we don't have to cache right now. Is it advantageous to // do this later, maybe depending on CPU usage? // attempt to cache if we don't have a thread pool (otherwise it's done @@ -343,27 +310,6 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram( return token; } -bool ShaderCompilerService::isProgramReady( - const ShaderCompilerService::program_token_t& token) const noexcept { - - assert_invariant(token); - - if (!token->gl.program) { - return false; - } - - if (KHR_parallel_shader_compile) { - GLint status = GL_FALSE; - glGetProgramiv(token->gl.program, GL_COMPLETION_STATUS, &status); - return (bool)status; - } - - // If gl.program is set, this means the program was linked. Some drivers may defer the link - // in which case we might block in getProgram() when we check the program status. - // Unfortunately, this is nothing we can do about that. - return bool(token->gl.program); -} - GLuint ShaderCompilerService::getProgram(ShaderCompilerService::program_token_t& token) { GLuint const program = initialize(token); assert_invariant(token == nullptr); @@ -395,74 +341,26 @@ GLuint ShaderCompilerService::getProgram(ShaderCompilerService::program_token_t& } void ShaderCompilerService::tick() { - executeTickOps(); + // we don't need to run executeTickOps() if we're using the thread-pool + if (UTILS_UNLIKELY(!mShaderCompilerThreadCount)) { + executeTickOps(); + } } -void ShaderCompilerService::notifyWhenAllProgramsAreReady(CompilerPriorityQueue priority, +void ShaderCompilerService::notifyWhenAllProgramsAreReady( CallbackHandler* handler, CallbackHandler::Callback callback, void* user) { - - if (KHR_parallel_shader_compile || mShaderCompilerThreadCount) { - // list all programs up to this point, both low and high priority - - using TokenVector = utils::FixedCapacityVector< - program_token_t, std::allocator, false>; - TokenVector tokens{ TokenVector::with_capacity(mRunAtNextTickOps.size()) }; - - for (auto& [itemPriority, token, job] : mRunAtNextTickOps) { - if (token && job.fn && itemPriority == priority) { - tokens.push_back(token); - } - } - - runAtNextTick(priority, nullptr, { - [this, tokens = std::move(tokens)](Job const& job) { - for (auto const& token : tokens) { - assert_invariant(token); - if (!isProgramReady(token)) { - // one of the program is not ready, try next time - return false; - } - } - if (job.callback) { - // all programs are ready, we can call the callbacks - mDriver.scheduleCallback(job.handler, job.user, job.callback); - } - // and we're done - return true; - }, handler, user, callback }); - - return; + if (callback) { + mCallbackManager.setCallback(handler, callback, user); } - - // we don't have KHR_parallel_shader_compile - - runAtNextTick(priority, nullptr, {[this](Job const& job) { - mDriver.scheduleCallback(job.handler, job.user, job.callback); - return true; - }, handler, user, callback }); - - // TODO: we could spread the compiles over several frames, the tick() below then is not - // needed here. We keep it for now as to not change the current behavior too much. - // this will block until all programs are linked - tick(); } // ------------------------------------------------------------------------------------------------ void ShaderCompilerService::getProgramFromCompilerPool(program_token_t& token) noexcept { - OpenGLProgramToken::ProgramBinary const& binary{ token->get() }; + OpenGLProgramToken::ProgramData const& programData{ token->get() }; if (!token->canceled) { - token->gl.shaders = binary.shaders; - if (UTILS_LIKELY(mUseSharedContext)) { - token->gl.program = binary.program; - } -#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 - else { - token->gl.program = glCreateProgram(); - glProgramBinary(token->gl.program, binary.format, - binary.blob.data(), GLsizei(binary.blob.size())); - } -#endif + token->gl.shaders = programData.shaders; + token->gl.program = programData.program; } } @@ -489,8 +387,17 @@ GLuint ShaderCompilerService::initialize(program_token_t& token) noexcept { // we force the program link -- which might stall, either here or below in // checkProgramStatus(), but we don't have a choice, we need to use the program now. token->compiler.cancelTickOp(token); + token->gl.program = linkProgram(mDriver.getContext(), token->gl.shaders, token->attributes); + + assert_invariant(token->gl.program); + + mCallbackManager.put(token->handle); + + if (token->key) { + OpenGLBlobCache::insert(mDriver.mPlatform, token->key, token->gl.program); + } } else { // if we don't have a program yet, block until we get it. tick(); diff --git a/filament/backend/src/opengl/ShaderCompilerService.h b/filament/backend/src/opengl/ShaderCompilerService.h index 668d4e31f24..03043ca5ecd 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.h +++ b/filament/backend/src/opengl/ShaderCompilerService.h @@ -19,6 +19,7 @@ #include "gl_headers.h" +#include "CallbackManager.h" #include "CompilerThreadPool.h" #include @@ -71,10 +72,6 @@ class ShaderCompilerService { // creates a program (compile + link) asynchronously if supported program_token_t createProgram(utils::CString const& name, Program&& program); - // Returns true if the program is linked (successfully or not). Guarantees that - // getProgram() won't block. Does not block. - bool isProgramReady(const program_token_t& token) const noexcept; - // Return the GL program, blocks if necessary. The Token is destroyed and becomes invalid. GLuint getProgram(program_token_t& token); @@ -91,20 +88,17 @@ class ShaderCompilerService { static void* getUserData(const program_token_t& token) noexcept; // call the callback when all active programs are ready - void notifyWhenAllProgramsAreReady(CompilerPriorityQueue priority, + void notifyWhenAllProgramsAreReady( CallbackHandler* handler, CallbackHandler::Callback callback, void* user); private: OpenGLDriver& mDriver; + CallbackManager mCallbackManager; CompilerThreadPool mCompilerThreadPool; const bool KHR_parallel_shader_compile; uint32_t mShaderCompilerThreadCount = 0u; - // For now, we assume shared contexts are supported everywhere. If they are not, - // we don't use the shader compiler pool. However, the code supports it. - static constexpr bool mUseSharedContext = true; - GLuint initialize(ShaderCompilerService::program_token_t& token) noexcept; static void getProgramFromCompilerPool(program_token_t& token) noexcept; From 69f78dbcbe0499355b39d160cdde8b0eff3548fc Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Wed, 16 Aug 2023 22:41:31 -0700 Subject: [PATCH 06/23] better fix for calls to eglMakeCurrent turns out that KHR_surfaceless_context is implied for ES3.0 when KHR_create_context is present. However, Adreno 306 fails even if it advertises it. So, we now reset the value of KHR_surfaceless_context based on actually calling eglMakeCurrent(EGL_NO_SURFACE). --- .../src/opengl/platforms/PlatformEGL.cpp | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/filament/backend/src/opengl/platforms/PlatformEGL.cpp b/filament/backend/src/opengl/platforms/PlatformEGL.cpp index df13526d3c3..59f5a89054f 100644 --- a/filament/backend/src/opengl/platforms/PlatformEGL.cpp +++ b/filament/backend/src/opengl/platforms/PlatformEGL.cpp @@ -115,10 +115,14 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon auto extensions = GLUtils::split(eglQueryString(mEGLDisplay, EGL_EXTENSIONS)); ext.egl.ANDROID_recordable = extensions.has("EGL_ANDROID_recordable"); - ext.egl.KHR_create_context = extensions.has("EGL_KHR_create_context"); ext.egl.KHR_gl_colorspace = extensions.has("EGL_KHR_gl_colorspace"); + ext.egl.KHR_create_context = extensions.has("EGL_KHR_create_context"); ext.egl.KHR_no_config_context = extensions.has("EGL_KHR_no_config_context"); ext.egl.KHR_surfaceless_context = extensions.has("KHR_surfaceless_context"); + if (ext.egl.KHR_create_context) { + // KHR_create_context implies KHR_surfaceless_context for ES3.x contexts + ext.egl.KHR_surfaceless_context = true; + } eglCreateSyncKHR = (PFNEGLCREATESYNCKHRPROC) eglGetProcAddress("eglCreateSyncKHR"); eglDestroySyncKHR = (PFNEGLDESTROYSYNCKHRPROC) eglGetProcAddress("eglDestroySyncKHR"); @@ -182,15 +186,6 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon eglConfig = mEGLConfig; } - if (UTILS_UNLIKELY(!ext.egl.KHR_surfaceless_context)) { - // create the dummy surface, just for being able to make the context current. - mEGLDummySurface = eglCreatePbufferSurface(mEGLDisplay, mEGLConfig, pbufferAttribs); - if (UTILS_UNLIKELY(mEGLDummySurface == EGL_NO_SURFACE)) { - logEglError("eglCreatePbufferSurface"); - goto error; - } - } - for (size_t tries = 0; tries < 3; tries++) { mEGLContext = eglCreateContext(mEGLDisplay, eglConfig, (EGLContext)sharedContext, contextAttribs.data()); @@ -223,6 +218,26 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon goto error; } + if (ext.egl.KHR_surfaceless_context) { + // Adreno 306 driver advertises KHR_create_context but doesn't support passing + // EGL_NO_SURFACE to eglMakeCurrent with a 3.0 context. + if (UTILS_UNLIKELY(!eglMakeCurrent(mEGLDisplay, + EGL_NO_SURFACE, EGL_NO_SURFACE, mEGLContext))) { + if (eglGetError() == EGL_BAD_MATCH) { + ext.egl.KHR_surfaceless_context = false; + } + } + } + + if (UTILS_UNLIKELY(!ext.egl.KHR_surfaceless_context)) { + // create the dummy surface, just for being able to make the context current. + mEGLDummySurface = eglCreatePbufferSurface(mEGLDisplay, mEGLConfig, pbufferAttribs); + if (UTILS_UNLIKELY(mEGLDummySurface == EGL_NO_SURFACE)) { + logEglError("eglCreatePbufferSurface"); + goto error; + } + } + if (UTILS_UNLIKELY(!makeCurrent(mEGLDummySurface, mEGLDummySurface))) { // eglMakeCurrent failed logEglError("eglMakeCurrent"); From 6c0db3791943d752e2bb74bda4b6858de651b4b8 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Thu, 17 Aug 2023 15:19:43 -0700 Subject: [PATCH 07/23] vulkan: fix readPixels selectMemory (#7084) readPixels requests staging memory to be host-visible/coherent/cached. But "cached" is not supported on Mali (Pixel 6pro). We make it a preferrable but optional bit. --- filament/backend/src/vulkan/VulkanContext.h | 3 +-- .../backend/src/vulkan/VulkanReadPixels.cpp | 22 ++++++++++++++++--- filament/backend/src/vulkan/VulkanTexture.cpp | 10 +++++++-- .../platform/VulkanPlatformSwapChainImpl.cpp | 10 +++++++-- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/filament/backend/src/vulkan/VulkanContext.h b/filament/backend/src/vulkan/VulkanContext.h index a22951deb61..2dba9d3bdeb 100644 --- a/filament/backend/src/vulkan/VulkanContext.h +++ b/filament/backend/src/vulkan/VulkanContext.h @@ -97,8 +97,7 @@ struct VulkanContext { } flags >>= 1; } - ASSERT_POSTCONDITION(false, "Unable to find a memory type that meets requirements."); - return (uint32_t) ~0ul; + return (uint32_t) VK_MAX_MEMORY_TYPES; } inline VkFormat getDepthFormat() const { diff --git a/filament/backend/src/vulkan/VulkanReadPixels.cpp b/filament/backend/src/vulkan/VulkanReadPixels.cpp index e299d206597..fd51344b830 100644 --- a/filament/backend/src/vulkan/VulkanReadPixels.cpp +++ b/filament/backend/src/vulkan/VulkanReadPixels.cpp @@ -176,12 +176,28 @@ void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x VkMemoryRequirements memReqs; VkDeviceMemory stagingMemory; vkGetImageMemoryRequirements(device, stagingImage, &memReqs); + + uint32_t memoryTypeIndex = selectMemoryFunc(memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + + // If VK_MEMORY_PROPERTY_HOST_CACHED_BIT is not supported, we try only + // HOST_VISIBLE+HOST_COHERENT. HOST_CACHED helps a lot with readpixels performance. + if (memoryTypeIndex >= VK_MAX_MEMORY_TYPES) { + memoryTypeIndex = selectMemoryFunc(memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + utils::slog.w + << "readPixels is slow because VK_MEMORY_PROPERTY_HOST_CACHED_BIT is not available" + << utils::io::endl; + } + + ASSERT_POSTCONDITION(memoryTypeIndex < VK_MAX_MEMORY_TYPES, + "VulkanReadPixels: unable to find a memory type that meets requirements."); + VkMemoryAllocateInfo const allocInfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = memReqs.size, - .memoryTypeIndex = selectMemoryFunc(memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT - | VK_MEMORY_PROPERTY_HOST_CACHED_BIT), + .memoryTypeIndex = memoryTypeIndex, }; vkAllocateMemory(device, &allocInfo, VKALLOC, &stagingMemory); diff --git a/filament/backend/src/vulkan/VulkanTexture.cpp b/filament/backend/src/vulkan/VulkanTexture.cpp index 6b86def4e8a..8df63626a3d 100644 --- a/filament/backend/src/vulkan/VulkanTexture.cpp +++ b/filament/backend/src/vulkan/VulkanTexture.cpp @@ -167,11 +167,17 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice, // Allocate memory for the VkImage and bind it. VkMemoryRequirements memReqs = {}; vkGetImageMemoryRequirements(mDevice, mTextureImage, &memReqs); + + uint32_t memoryTypeIndex + = context.selectMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + ASSERT_POSTCONDITION(memoryTypeIndex < VK_MAX_MEMORY_TYPES, + "VulkanTexture: unable to find a memory type that meets requirements."); + VkMemoryAllocateInfo allocInfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = memReqs.size, - .memoryTypeIndex = context.selectMemoryType(memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + .memoryTypeIndex = memoryTypeIndex, }; error = vkAllocateMemory(mDevice, &allocInfo, nullptr, &mTextureImageMemory); ASSERT_POSTCONDITION(!error, "Unable to allocate image memory."); diff --git a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp index 41a4f891115..f83e8be93b9 100644 --- a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp +++ b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp @@ -57,11 +57,17 @@ std::tuple createImageAndMemory(VulkanContext const& co VkDeviceMemory imageMemory; VkMemoryRequirements memReqs; vkGetImageMemoryRequirements(device, image, &memReqs); + + uint32_t memoryTypeIndex + = context.selectMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + ASSERT_POSTCONDITION(memoryTypeIndex < VK_MAX_MEMORY_TYPES, + "VulkanPlatformSwapChainImpl: unable to find a memory type that meets requirements."); + VkMemoryAllocateInfo allocInfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = memReqs.size, - .memoryTypeIndex - = context.selectMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), + .memoryTypeIndex = memoryTypeIndex, }; result = vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory); ASSERT_POSTCONDITION(result == VK_SUCCESS, "Unable to allocate image memory."); From fc7b6447b76bcee535c22af7d06372c714ba2e0e Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Thu, 17 Aug 2023 16:52:09 -0700 Subject: [PATCH 08/23] make sure to not assert when matdbg is enabled --- filament/backend/src/opengl/ShaderCompilerService.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/filament/backend/src/opengl/ShaderCompilerService.cpp b/filament/backend/src/opengl/ShaderCompilerService.cpp index dcbfb0f9f94..7be3781d35e 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.cpp +++ b/filament/backend/src/opengl/ShaderCompilerService.cpp @@ -313,7 +313,9 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram( GLuint ShaderCompilerService::getProgram(ShaderCompilerService::program_token_t& token) { GLuint const program = initialize(token); assert_invariant(token == nullptr); +#ifndef FILAMENT_ENABLE_MATDBG assert_invariant(program); +#endif return program; } From 26952631a38e92b7d9ec53d8dfef3aeb9a114062 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Thu, 17 Aug 2023 16:02:07 -0700 Subject: [PATCH 09/23] only attempt to compile shaders in parallel if supported It can be extremely counter productive to attempt to do this if not supported. --- .../backend/include/private/backend/DriverAPI.inc | 1 + filament/backend/src/metal/MetalDriver.mm | 4 ++++ filament/backend/src/noop/NoopDriver.cpp | 4 ++++ filament/backend/src/opengl/OpenGLDriver.cpp | 4 ++++ .../backend/src/opengl/ShaderCompilerService.cpp | 4 ++++ .../backend/src/opengl/ShaderCompilerService.h | 2 ++ filament/backend/src/vulkan/VulkanDriver.cpp | 4 ++++ filament/src/details/Material.cpp | 14 ++++++++------ 8 files changed, 31 insertions(+), 6 deletions(-) diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index 2cb16a60fed..98b54663ba6 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -297,6 +297,7 @@ DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameBufferFetchMultiSampleSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameTimeSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isAutoDepthResolveSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isSRGBSwapChainSupported) +DECL_DRIVER_API_SYNCHRONOUS_0(bool, isParallelShaderCompileSupported) DECL_DRIVER_API_SYNCHRONOUS_0(uint8_t, getMaxDrawBuffers) DECL_DRIVER_API_SYNCHRONOUS_0(size_t, getMaxUniformBufferSize) DECL_DRIVER_API_SYNCHRONOUS_0(math::float2, getClipSpaceParams) diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 4476da77371..231aef4d71c 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -696,6 +696,10 @@ return false; } +bool MetalDriver::isParallelShaderCompileSupported() { + return false; +} + bool MetalDriver::isWorkaroundNeeded(Workaround workaround) { switch (workaround) { case Workaround::SPLIT_EASU: diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp index 19b1cb5380f..72265483640 100644 --- a/filament/backend/src/noop/NoopDriver.cpp +++ b/filament/backend/src/noop/NoopDriver.cpp @@ -174,6 +174,10 @@ bool NoopDriver::isSRGBSwapChainSupported() { return false; } +bool NoopDriver::isParallelShaderCompileSupported() { + return false; +} + bool NoopDriver::isWorkaroundNeeded(Workaround) { return false; } diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index 3d4eef3076d..84b3705e14e 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -1872,6 +1872,10 @@ bool OpenGLDriver::isSRGBSwapChainSupported() { return mPlatform.isSRGBSwapChainSupported(); } +bool OpenGLDriver::isParallelShaderCompileSupported() { + return mShaderCompilerService.isParallelShaderCompileSupported(); +} + bool OpenGLDriver::isWorkaroundNeeded(Workaround workaround) { switch (workaround) { case Workaround::SPLIT_EASU: diff --git a/filament/backend/src/opengl/ShaderCompilerService.cpp b/filament/backend/src/opengl/ShaderCompilerService.cpp index 7be3781d35e..b7b23022341 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.cpp +++ b/filament/backend/src/opengl/ShaderCompilerService.cpp @@ -141,6 +141,10 @@ ShaderCompilerService::ShaderCompilerService(OpenGLDriver& driver) ShaderCompilerService::~ShaderCompilerService() noexcept = default; +bool ShaderCompilerService::isParallelShaderCompileSupported() const noexcept { + return KHR_parallel_shader_compile || mShaderCompilerThreadCount; +} + void ShaderCompilerService::init() noexcept { // If we have KHR_parallel_shader_compile, we always use it, it should be more resource // friendly. diff --git a/filament/backend/src/opengl/ShaderCompilerService.h b/filament/backend/src/opengl/ShaderCompilerService.h index 03043ca5ecd..0d8cb191929 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.h +++ b/filament/backend/src/opengl/ShaderCompilerService.h @@ -66,6 +66,8 @@ class ShaderCompilerService { ~ShaderCompilerService() noexcept; + bool isParallelShaderCompileSupported() const noexcept; + void init() noexcept; void terminate() noexcept; diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 374f6ffe876..7187a6a3277 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -755,6 +755,10 @@ bool VulkanDriver::isSRGBSwapChainSupported() { return mPlatform->isSRGBSwapChainSupported(); } +bool VulkanDriver::isParallelShaderCompileSupported() { + return false; +} + bool VulkanDriver::isWorkaroundNeeded(Workaround workaround) { switch (workaround) { case Workaround::SPLIT_EASU: { diff --git a/filament/src/details/Material.cpp b/filament/src/details/Material.cpp index 9b3f8341572..6d655b3777f 100644 --- a/filament/src/details/Material.cpp +++ b/filament/src/details/Material.cpp @@ -481,12 +481,14 @@ void FMaterial::compile(CompilerPriorityQueue priority, UserVariantFilterMask const variantFilter = ~variantSpec & UserVariantFilterMask(UserVariantFilterBit::ALL); - auto const& variants = isVariantLit() ? - VariantUtils::getLitVariants() : VariantUtils::getUnlitVariants(); - for (auto const variant : variants) { - if (!variantFilter || variant == Variant::filterUserVariant(variant, variantFilter)) { - if (hasVariant(variant)) { - prepareProgram(variant, priority); + if (UTILS_LIKELY(mEngine.getDriverApi().isParallelShaderCompileSupported())) { + auto const& variants = isVariantLit() ? + VariantUtils::getLitVariants() : VariantUtils::getUnlitVariants(); + for (auto const variant: variants) { + if (!variantFilter || variant == Variant::filterUserVariant(variant, variantFilter)) { + if (hasVariant(variant)) { + prepareProgram(variant, priority); + } } } } From 17caf6cae9fc891d371b9e2ef2523f30f0a86955 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Thu, 17 Aug 2023 15:14:05 -0700 Subject: [PATCH 10/23] improvements to CompilerThreadPool and OpenGLPlatform CompilerThreadPool: - it now supports a thread cleanup function - some initialization is moved to the setup function OpenGLPlatform: - now cleans-up the thread pool threads upon exit --- .../backend/platforms/OpenGLPlatform.h | 6 ++++++ .../include/backend/platforms/PlatformEGL.h | 1 + filament/backend/src/CompilerThreadPool.cpp | 13 ++++++------- filament/backend/src/CompilerThreadPool.h | 16 +++++++++------- .../backend/src/opengl/OpenGLPlatform.cpp | 3 +++ .../src/opengl/ShaderCompilerService.cpp | 14 +++++++++++--- .../src/opengl/platforms/PlatformEGL.cpp | 19 ++++++++++++++++++- 7 files changed, 54 insertions(+), 18 deletions(-) diff --git a/filament/backend/include/backend/platforms/OpenGLPlatform.h b/filament/backend/include/backend/platforms/OpenGLPlatform.h index c41dce43602..3f4488c5f53 100644 --- a/filament/backend/include/backend/platforms/OpenGLPlatform.h +++ b/filament/backend/include/backend/platforms/OpenGLPlatform.h @@ -288,6 +288,12 @@ class OpenGLPlatform : public Platform { * @see terminate() */ virtual void createContext(bool shared); + + /** + * Detach and destroy the current context if any and releases all resources associated to + * this thread. + */ + virtual void releaseContext() noexcept; }; } // namespace filament diff --git a/filament/backend/include/backend/platforms/PlatformEGL.h b/filament/backend/include/backend/platforms/PlatformEGL.h index 08124909a2d..79400540063 100644 --- a/filament/backend/include/backend/platforms/PlatformEGL.h +++ b/filament/backend/include/backend/platforms/PlatformEGL.h @@ -40,6 +40,7 @@ class PlatformEGL : public OpenGLPlatform { PlatformEGL() noexcept; bool isExtraContextSupported() const noexcept override; void createContext(bool shared) override; + void releaseContext() noexcept override; protected: diff --git a/filament/backend/src/CompilerThreadPool.cpp b/filament/backend/src/CompilerThreadPool.cpp index eeb116db3b2..591ae063c31 100644 --- a/filament/backend/src/CompilerThreadPool.cpp +++ b/filament/backend/src/CompilerThreadPool.cpp @@ -34,17 +34,14 @@ CompilerThreadPool::~CompilerThreadPool() noexcept { assert_invariant(mQueues[1].empty()); } -void CompilerThreadPool::init(uint32_t threadCount, JobSystem::Priority priority, - ThreadSetup&& threadSetup) noexcept { +void CompilerThreadPool::init(uint32_t threadCount, + ThreadSetup&& threadSetup, ThreadCleanup&& threadCleanup) noexcept { auto setup = std::make_shared(std::move(threadSetup)); + auto cleanup = std::make_shared(std::move(threadCleanup)); for (size_t i = 0; i < threadCount; i++) { - mCompilerThreads.emplace_back([this, priority, setup]() { + mCompilerThreads.emplace_back([this, setup, cleanup]() { SYSTRACE_CONTEXT(); - // give the thread a name - JobSystem::setThreadName("CompilerThreadPool"); - // run at a slightly lower priority than other filament threads - JobSystem::setThreadPriority(priority); (*setup)(); @@ -80,6 +77,8 @@ void CompilerThreadPool::init(uint32_t threadCount, JobSystem::Priority priority job(); } } + + (*cleanup)(); }); } diff --git a/filament/backend/src/CompilerThreadPool.h b/filament/backend/src/CompilerThreadPool.h index 78ce4c70796..fbdff68f695 100644 --- a/filament/backend/src/CompilerThreadPool.h +++ b/filament/backend/src/CompilerThreadPool.h @@ -20,12 +20,13 @@ #include #include -#include +#include +#include #include -#include #include #include +#include #include #include @@ -45,8 +46,9 @@ class CompilerThreadPool { ~CompilerThreadPool() noexcept; using Job = utils::Invocable; using ThreadSetup = utils::Invocable; - void init(uint32_t threadCount, utils::JobSystem::Priority priority, - ThreadSetup&& threadSetup) noexcept; + using ThreadCleanup = utils::Invocable; + void init(uint32_t threadCount, + ThreadSetup&& threadSetup, ThreadCleanup&& threadCleanup) noexcept; void terminate() noexcept; void queue(CompilerPriorityQueue priorityQueue, program_token_t const& token, Job&& job); Job dequeue(program_token_t const& token); @@ -54,9 +56,9 @@ class CompilerThreadPool { private: using Queue = std::deque>; std::vector mCompilerThreads; - std::atomic_bool mExitRequested{false}; - std::mutex mQueueLock; - std::condition_variable mQueueCondition; + bool mExitRequested{ false }; + utils::Mutex mQueueLock; + utils::Condition mQueueCondition; std::array mQueues; // lock must be held for methods below std::pair find(program_token_t const& token); diff --git a/filament/backend/src/opengl/OpenGLPlatform.cpp b/filament/backend/src/opengl/OpenGLPlatform.cpp index 4297479d97f..837faecdc4c 100644 --- a/filament/backend/src/opengl/OpenGLPlatform.cpp +++ b/filament/backend/src/opengl/OpenGLPlatform.cpp @@ -116,4 +116,7 @@ bool OpenGLPlatform::isExtraContextSupported() const noexcept { void OpenGLPlatform::createContext(bool) { } +void OpenGLPlatform::releaseContext() noexcept { +} + } // namespace filament::backend diff --git a/filament/backend/src/opengl/ShaderCompilerService.cpp b/filament/backend/src/opengl/ShaderCompilerService.cpp index b7b23022341..d3d4cdd0658 100644 --- a/filament/backend/src/opengl/ShaderCompilerService.cpp +++ b/filament/backend/src/opengl/ShaderCompilerService.cpp @@ -175,10 +175,18 @@ void ShaderCompilerService::init() noexcept { } mShaderCompilerThreadCount = poolSize; - mCompilerThreadPool.init(mShaderCompilerThreadCount, priority, - [platform = &mDriver.mPlatform]() { + mCompilerThreadPool.init(mShaderCompilerThreadCount, + [&platform = mDriver.mPlatform, priority]() { + // give the thread a name + JobSystem::setThreadName("CompilerThreadPool"); + // run at a slightly lower priority than other filament threads + JobSystem::setThreadPriority(priority); // create a gl context current to this thread - platform->createContext(true); + platform.createContext(true); + }, + [&platform = mDriver.mPlatform]() { + // release context and thread state + platform.releaseContext(); }); } } diff --git a/filament/backend/src/opengl/platforms/PlatformEGL.cpp b/filament/backend/src/opengl/platforms/PlatformEGL.cpp index 59f5a89054f..60652b54156 100644 --- a/filament/backend/src/opengl/platforms/PlatformEGL.cpp +++ b/filament/backend/src/opengl/platforms/PlatformEGL.cpp @@ -294,6 +294,22 @@ void PlatformEGL::createContext(bool shared) { mAdditionalContexts.push_back(context); } +void PlatformEGL::releaseContext() noexcept { + EGLContext context = eglGetCurrentContext(); + eglMakeCurrent(mEGLDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + if (context != EGL_NO_CONTEXT) { + eglDestroyContext(mEGLDisplay, context); + } + + mAdditionalContexts.erase( + std::remove_if(mAdditionalContexts.begin(), mAdditionalContexts.end(), + [context](EGLContext c) { + return c == context; + }), mAdditionalContexts.end()); + + eglReleaseThread(); +} + EGLBoolean PlatformEGL::makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept { if (UTILS_UNLIKELY((drawSurface != mCurrentDrawSurface || readSurface != mCurrentReadSurface))) { mCurrentDrawSurface = drawSurface; @@ -304,7 +320,8 @@ EGLBoolean PlatformEGL::makeCurrent(EGLSurface drawSurface, EGLSurface readSurfa } void PlatformEGL::terminate() noexcept { - eglMakeCurrent(mEGLDisplay, mEGLDummySurface, mEGLDummySurface, EGL_NO_CONTEXT); + // it's always allowed to use EGL_NO_SURFACE, EGL_NO_CONTEXT + eglMakeCurrent(mEGLDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); if (mEGLDummySurface) { eglDestroySurface(mEGLDisplay, mEGLDummySurface); } From 098be2e11508f5988bacc5e3b4d80040704a7a75 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Fri, 18 Aug 2023 10:25:10 -0700 Subject: [PATCH 11/23] rework how we initialize the gl context (#7085) * rework how we initialize the gl context - early initialization is now implemented with static methods so that it's very clear which state they need. - the version number is no longer used outside of initialization, instead we use the feature level. - ES3.0 Adreno devices are downgraded to feature level 0 * Update filament/backend/src/opengl/OpenGLContext.cpp Co-authored-by: Powei Feng --------- Co-authored-by: Powei Feng --- filament/backend/src/opengl/OpenGLContext.cpp | 726 ++++++++++-------- filament/backend/src/opengl/OpenGLContext.h | 50 +- 2 files changed, 446 insertions(+), 330 deletions(-) diff --git a/filament/backend/src/opengl/OpenGLContext.cpp b/filament/backend/src/opengl/OpenGLContext.cpp index e7a88f6b96d..072096718b3 100644 --- a/filament/backend/src/opengl/OpenGLContext.cpp +++ b/filament/backend/src/opengl/OpenGLContext.cpp @@ -49,6 +49,7 @@ bool OpenGLContext::queryOpenGLVersion(GLint* major, GLint* minor) noexcept { } OpenGLContext::OpenGLContext() noexcept { + state.vao.p = &mDefaultVAO; // These queries work with all GL/GLES versions! @@ -61,265 +62,74 @@ OpenGLContext::OpenGLContext() noexcept { "[" << state.version << "], [" << state.shader << "]" << io::endl; /* - * Figure out GL / GLES version and available features + * Figure out GL / GLES version, extensions and capabilities we need to + * determine the feature level */ queryOpenGLVersion(&state.major, &state.minor); - glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE, &gets.max_renderbuffer_size); - glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &gets.max_texture_image_units); - glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &gets.max_combined_texture_image_units); + OpenGLContext::initExtensions(&ext, state.major, state.minor); - if (state.major > 2) { // this check works for both GL and GLES, but is intended for GLES -#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 - glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &gets.max_uniform_block_size); - glGetIntegerv(GL_MAX_UNIFORM_BUFFER_BINDINGS, &gets.max_uniform_buffer_bindings); - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &gets.uniform_buffer_offset_alignment); - glGetIntegerv(GL_MAX_SAMPLES, &gets.max_samples); - glGetIntegerv(GL_MAX_DRAW_BUFFERS, &gets.max_draw_buffers); - glGetIntegerv(GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS, - &gets.max_transform_feedback_separate_attribs); -#endif - } else { - gets.max_uniform_block_size = 0; - gets.max_uniform_buffer_bindings = 0; - gets.uniform_buffer_offset_alignment = 0; - gets.max_samples = 1; - gets.max_draw_buffers = 1; - gets.max_transform_feedback_separate_attribs = 0; - } + OpenGLContext::initProcs(&procs, ext, state.major, state.minor); - constexpr auto const caps3 = FEATURE_LEVEL_CAPS[+FeatureLevel::FEATURE_LEVEL_3]; - constexpr GLint MAX_VERTEX_SAMPLER_COUNT = caps3.MAX_VERTEX_SAMPLER_COUNT; - constexpr GLint MAX_FRAGMENT_SAMPLER_COUNT = caps3.MAX_FRAGMENT_SAMPLER_COUNT; + OpenGLContext::initBugs(&bugs, ext, state.major, state.minor, + state.vendor, state.renderer, state.version, state.shader); - // default procs that can be overridden based on runtime version -#ifdef BACKEND_OPENGL_LEVEL_GLES30 - procs.genVertexArrays = glGenVertexArrays; - procs.bindVertexArray = glBindVertexArray; - procs.deleteVertexArrays = glDeleteVertexArrays; + glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE, &gets.max_renderbuffer_size); + glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &gets.max_texture_image_units); + glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &gets.max_combined_texture_image_units); - // these are core in GL and GLES 3.x - procs.genQueries = glGenQueries; - procs.deleteQueries = glDeleteQueries; - procs.beginQuery = glBeginQuery; - procs.endQuery = glEndQuery; - procs.getQueryObjectuiv = glGetQueryObjectuiv; -# ifdef BACKEND_OPENGL_VERSION_GL - procs.getQueryObjectui64v = glGetQueryObjectui64v; // only core in GL -# elif defined(GL_EXT_disjoint_timer_query) - procs.getQueryObjectui64v = glGetQueryObjectui64vEXT; -# endif // BACKEND_OPENGL_VERSION_GL - - // core in ES 3.0 and GL 4.3 - procs.invalidateFramebuffer = glInvalidateFramebuffer; -#endif // BACKEND_OPENGL_LEVEL_GLES30 - - // no-op if not supported - procs.maxShaderCompilerThreadsKHR = +[](GLuint) {}; + mFeatureLevel = OpenGLContext::resolveFeatureLevel(state.major, state.minor, ext, gets, bugs); #ifdef BACKEND_OPENGL_VERSION_GLES - initExtensionsGLES(); - if (state.major == 3) { - // Runtime OpenGL version is ES 3.x - assert_invariant(gets.max_texture_image_units >= 16); - assert_invariant(gets.max_combined_texture_image_units >= 32); - if (state.minor >= 1) { - features.multisample_texture = true; - // figure out our feature level - if (ext.EXT_texture_cube_map_array) { - mFeatureLevel = FeatureLevel::FEATURE_LEVEL_2; - if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT && - gets.max_combined_texture_image_units >= - (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) { - mFeatureLevel = FeatureLevel::FEATURE_LEVEL_3; - } - } - } - } -#ifndef IOS // IOS is guaranteed to have ES3.x - else if (UTILS_UNLIKELY(state.major == 2)) { - // Runtime OpenGL version is ES 2.x - -#if defined(BACKEND_OPENGL_LEVEL_GLES30) - // mandatory extensions (all supported by Mali-400 and Adreno 304) - assert_invariant(ext.OES_depth_texture); - assert_invariant(ext.OES_depth24); - assert_invariant(ext.OES_packed_depth_stencil); - assert_invariant(ext.OES_rgb8_rgba8); - assert_invariant(ext.OES_standard_derivatives); - assert_invariant(ext.OES_texture_npot); + mShaderModel = ShaderModel::MOBILE; +#else + mShaderModel = ShaderModel::DESKTOP; #endif - if (UTILS_LIKELY(ext.OES_vertex_array_object)) { - procs.genVertexArrays = glGenVertexArraysOES; - procs.bindVertexArray = glBindVertexArrayOES; - procs.deleteVertexArrays = glDeleteVertexArraysOES; - } else { - // if we don't have OES_vertex_array_object, just don't do anything with real VAOs, - // we'll just rebind everything each time. Most Mali-400 support this extension, but - // a few don't. - procs.genVertexArrays = +[](GLsizei, GLuint*) {}; - procs.bindVertexArray = +[](GLuint) {}; - procs.deleteVertexArrays = +[](GLsizei, GLuint const*) {}; - // we activate this workaround path, which does the reset of array buffer - bugs.vao_doesnt_store_element_array_buffer_binding = true; - } - - // EXT_disjoint_timer_query is optional -- pointers will be null if not available - procs.genQueries = glGenQueriesEXT; - procs.deleteQueries = glDeleteQueriesEXT; - procs.beginQuery = glBeginQueryEXT; - procs.endQuery = glEndQueryEXT; - procs.getQueryObjectuiv = glGetQueryObjectuivEXT; - procs.getQueryObjectui64v = glGetQueryObjectui64vEXT; - - procs.invalidateFramebuffer = glDiscardFramebufferEXT; - - procs.maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsKHR; - - mFeatureLevel = FeatureLevel::FEATURE_LEVEL_0; +#ifdef BACKEND_OPENGL_VERSION_GLES + if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_2) { + features.multisample_texture = true; } -#endif // IOS #else - initExtensionsGL(); - if (state.major == 4) { - assert_invariant(state.minor >= 1); - mShaderModel = ShaderModel::DESKTOP; - if (state.minor >= 3) { - // cubemap arrays are available as of OpenGL 4.0 - mFeatureLevel = FeatureLevel::FEATURE_LEVEL_2; - // figure out our feature level - if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT && - gets.max_combined_texture_image_units >= - (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) { - mFeatureLevel = FeatureLevel::FEATURE_LEVEL_3; - } - } + if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1) { features.multisample_texture = true; } - // feedback loops are allowed on GL desktop as long as writes are disabled - bugs.allow_read_only_ancillary_feedback_loop = true; - assert_invariant(gets.max_texture_image_units >= 16); - assert_invariant(gets.max_combined_texture_image_units >= 32); - - procs.maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsARB; #endif + if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1) { +#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 + glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, + &gets.max_uniform_block_size); + glGetIntegerv(GL_MAX_UNIFORM_BUFFER_BINDINGS, + &gets.max_uniform_buffer_bindings); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, + &gets.uniform_buffer_offset_alignment); + glGetIntegerv(GL_MAX_SAMPLES, + &gets.max_samples); + glGetIntegerv(GL_MAX_DRAW_BUFFERS, + &gets.max_draw_buffers); + glGetIntegerv(GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS, + &gets.max_transform_feedback_separate_attribs); #ifdef GL_EXT_texture_filter_anisotropic - if (ext.EXT_texture_filter_anisotropic) { - glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &gets.max_anisotropy); + if (ext.EXT_texture_filter_anisotropic) { + glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &gets.max_anisotropy); + } +#endif +#endif + } +#ifdef BACKEND_OPENGL_VERSION_GLES + else { + gets.max_uniform_block_size = 0; + gets.max_uniform_buffer_bindings = 0; + gets.uniform_buffer_offset_alignment = 0; + gets.max_samples = 1; + gets.max_draw_buffers = 1; + gets.max_transform_feedback_separate_attribs = 0; + gets.max_anisotropy = 1; } #endif - /* - * Figure out which driver bugs we need to workaround - */ - - const bool isAngle = strstr(state.renderer, "ANGLE"); - if (!isAngle) { - if (strstr(state.renderer, "Adreno")) { - // Qualcomm GPU - bugs.invalidate_end_only_if_invalidate_start = true; - - // On Adreno (As of 3/20) timer query seem to return the CPU time, not the GPU time. - bugs.dont_use_timer_query = true; - - // Blits to texture arrays are failing - // This bug continues to reproduce, though at times we've seen it appear to "go away". - // The standalone sample app that was written to show this problem still reproduces. - // The working hypothesis is that some other state affects this behavior. - bugs.disable_blit_into_texture_array = true; - - // early exit condition is flattened in EASU code - bugs.split_easu = true; - - // initialize the non-used uniform array for Adreno drivers. - bugs.enable_initialize_non_used_uniform_array = true; - - int maj, min, driverMajor, driverMinor; - int const c = sscanf(state.version, "OpenGL ES %d.%d V@%d.%d", // NOLINT(cert-err34-c) - &maj, &min, &driverMajor, &driverMinor); - if (c == 4) { - // Workarounds based on version here. - // notes: - // bugs.invalidate_end_only_if_invalidate_start - // - appeared at least in - // "OpenGL ES 3.2 V@0490.0 (GIT@85da404, I46ff5fc46f, 1606794520) (Date:11/30/20)" - // - wasn't present in - // "OpenGL ES 3.2 V@0490.0 (GIT@0905e9f, Ia11ce2d146, 1599072951) (Date:09/02/20)" - // - has been confirmed fixed in V@570.1 by Qualcomm - if (driverMajor < 490 || driverMajor > 570 || - (driverMajor == 570 && driverMinor >= 1)) { - bugs.invalidate_end_only_if_invalidate_start = false; - } - } - - // qualcomm seems to have no problem with this (which is good for us) - bugs.allow_read_only_ancillary_feedback_loop = true; - } else if (strstr(state.renderer, "Mali")) { - // ARM GPU - bugs.vao_doesnt_store_element_array_buffer_binding = true; - if (strstr(state.renderer, "Mali-T")) { - bugs.disable_glFlush = true; - bugs.disable_shared_context_draws = true; - bugs.texture_external_needs_rebind = true; - // We have not verified that timer queries work on Mali-T, so we disable to be safe. - bugs.dont_use_timer_query = true; - } - if (strstr(state.renderer, "Mali-G")) { - // We have run into several problems with timer queries on Mali-Gxx: - // - timer queries seem to cause memory corruptions in some cases on some devices - // (see b/233754398) - // - appeared at least in: "OpenGL ES 3.2 v1.r26p0-01eac0" - // - wasn't present in: "OpenGL ES 3.2 v1.r32p1-00pxl1" - // - timer queries sometime crash with an NPE (see b/273759031) - bugs.dont_use_timer_query = true; - } - // Mali seems to have no problem with this (which is good for us) - bugs.allow_read_only_ancillary_feedback_loop = true; - } else if (strstr(state.renderer, "Intel")) { - // Intel GPU - bugs.vao_doesnt_store_element_array_buffer_binding = true; - } else if (strstr(state.renderer, "PowerVR")) { - // PowerVR GPU - // On PowerVR (Rogue GE8320) glFlush doesn't seem to do anything, in particular, - // it doesn't kick the GPU earlier, so don't issue these calls as they seem to slow - // things down. - bugs.disable_glFlush = true; - // On PowerVR (Rogue GE8320) using gl_InstanceID too early in the shader doesn't work. - bugs.powervr_shader_workarounds = true; - // On PowerVR (Rogue GE8320) destroying a fbo after glBlitFramebuffer is effectively - // equivalent to glFinish. - bugs.delay_fbo_destruction = true; - // PowerVR seems to have no problem with this (which is good for us) - bugs.allow_read_only_ancillary_feedback_loop = true; - // PowerVR has a shader compiler thread pinned on the last core - bugs.disable_thread_affinity = true; - } else if (strstr(state.renderer, "Apple")) { - // Apple GPU - } else if (strstr(state.renderer, "Tegra") || - strstr(state.renderer, "GeForce") || - strstr(state.renderer, "NV")) { - // NVIDIA GPU - } else if (strstr(state.renderer, "Vivante")) { - // Vivante GPU - } else if (strstr(state.renderer, "AMD") || - strstr(state.renderer, "ATI")) { - // AMD/ATI GPU - } else if (strstr(state.renderer, "Mozilla")) { - bugs.disable_invalidate_framebuffer = true; - } - } else { - // When running under ANGLE, it's a different set of workaround that we need. - if (strstr(state.renderer, "Adreno")) { - // Qualcomm GPU - // early exit condition is flattened in EASU code - // (that should be regardless of ANGLE, but we should double-check) - bugs.split_easu = true; - } - // TODO: see if we could use `bugs.allow_read_only_ancillary_feedback_loop = true` - } slog.v << "Feature level: " << +mFeatureLevel << '\n'; slog.v << "Active workarounds: " << '\n'; @@ -345,14 +155,14 @@ OpenGLContext::OpenGLContext() noexcept { #endif #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 - assert_invariant(state.major <= 2 || gets.max_draw_buffers >= 4); // minspec + assert_invariant(mFeatureLevel == FeatureLevel::FEATURE_LEVEL_0 || gets.max_draw_buffers >= 4); // minspec #endif setDefaultState(); #ifdef GL_EXT_texture_filter_anisotropic #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 - if (state.major > 2 && ext.EXT_texture_filter_anisotropic) { + if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1 && ext.EXT_texture_filter_anisotropic) { // make sure we don't have any error flag while (glGetError() != GL_NO_ERROR) { } @@ -458,9 +268,287 @@ void OpenGLContext::setDefaultState() noexcept { } } + +void OpenGLContext::initProcs(Procs* procs, + Extensions const& ext, GLint major, GLint) noexcept { + (void)ext; + (void)major; + + // default procs that can be overridden based on runtime version +#ifdef BACKEND_OPENGL_LEVEL_GLES30 + procs->genVertexArrays = glGenVertexArrays; + procs->bindVertexArray = glBindVertexArray; + procs->deleteVertexArrays = glDeleteVertexArrays; + + // these are core in GL and GLES 3.x + procs->genQueries = glGenQueries; + procs->deleteQueries = glDeleteQueries; + procs->beginQuery = glBeginQuery; + procs->endQuery = glEndQuery; + procs->getQueryObjectuiv = glGetQueryObjectuiv; +# ifdef BACKEND_OPENGL_VERSION_GL + procs->getQueryObjectui64v = glGetQueryObjectui64v; // only core in GL +# elif defined(GL_EXT_disjoint_timer_query) + procs->getQueryObjectui64v = glGetQueryObjectui64vEXT; +# endif // BACKEND_OPENGL_VERSION_GL + + // core in ES 3.0 and GL 4.3 + procs->invalidateFramebuffer = glInvalidateFramebuffer; +#endif // BACKEND_OPENGL_LEVEL_GLES30 + + // no-op if not supported + procs->maxShaderCompilerThreadsKHR = +[](GLuint) {}; + +#ifdef BACKEND_OPENGL_VERSION_GLES +# ifndef IOS // IOS is guaranteed to have ES3.x + if (UTILS_UNLIKELY(major == 2)) { + // Runtime OpenGL version is ES 2.x + if (UTILS_LIKELY(ext.OES_vertex_array_object)) { + procs->genVertexArrays = glGenVertexArraysOES; + procs->bindVertexArray = glBindVertexArrayOES; + procs->deleteVertexArrays = glDeleteVertexArraysOES; + } else { + // if we don't have OES_vertex_array_object, just don't do anything with real VAOs, + // we'll just rebind everything each time. Most Mali-400 support this extension, but + // a few don't. + procs->genVertexArrays = +[](GLsizei, GLuint*) {}; + procs->bindVertexArray = +[](GLuint) {}; + procs->deleteVertexArrays = +[](GLsizei, GLuint const*) {}; + } + + // EXT_disjoint_timer_query is optional -- pointers will be null if not available + procs->genQueries = glGenQueriesEXT; + procs->deleteQueries = glDeleteQueriesEXT; + procs->beginQuery = glBeginQueryEXT; + procs->endQuery = glEndQueryEXT; + procs->getQueryObjectuiv = glGetQueryObjectuivEXT; + procs->getQueryObjectui64v = glGetQueryObjectui64vEXT; + + procs->invalidateFramebuffer = glDiscardFramebufferEXT; + + procs->maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsKHR; + } +# endif // IOS +#else + procs->maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsARB; +#endif +} + +void OpenGLContext::initBugs(Bugs* bugs, Extensions const& exts, + GLint major, GLint minor, + char const* vendor, + char const* renderer, + char const* version, + char const* shader) { + + (void)major; + (void)minor; + (void)vendor; + (void)renderer; + (void)version; + (void)shader; + + const bool isAngle = strstr(renderer, "ANGLE"); + if (!isAngle) { + if (strstr(renderer, "Adreno")) { + // Qualcomm GPU + bugs->invalidate_end_only_if_invalidate_start = true; + + // On Adreno (As of 3/20) timer query seem to return the CPU time, not the GPU time. + bugs->dont_use_timer_query = true; + + // Blits to texture arrays are failing + // This bug continues to reproduce, though at times we've seen it appear to "go away". + // The standalone sample app that was written to show this problem still reproduces. + // The working hypothesis is that some other state affects this behavior. + bugs->disable_blit_into_texture_array = true; + + // early exit condition is flattened in EASU code + bugs->split_easu = true; + + // initialize the non-used uniform array for Adreno drivers. + bugs->enable_initialize_non_used_uniform_array = true; + + int maj, min, driverMajor, driverMinor; + int const c = sscanf(version, "OpenGL ES %d.%d V@%d.%d", // NOLINT(cert-err34-c) + &maj, &min, &driverMajor, &driverMinor); + if (c == 4) { + // Workarounds based on version here. + // Notes: + // bugs.invalidate_end_only_if_invalidate_start + // - appeared at least in + // "OpenGL ES 3.2 V@0490.0 (GIT@85da404, I46ff5fc46f, 1606794520) (Date:11/30/20)" + // - wasn't present in + // "OpenGL ES 3.2 V@0490.0 (GIT@0905e9f, Ia11ce2d146, 1599072951) (Date:09/02/20)" + // - has been confirmed fixed in V@570.1 by Qualcomm + if (driverMajor < 490 || driverMajor > 570 || + (driverMajor == 570 && driverMinor >= 1)) { + bugs->invalidate_end_only_if_invalidate_start = false; + } + } + + // qualcomm seems to have no problem with this (which is good for us) + bugs->allow_read_only_ancillary_feedback_loop = true; + + // Older Adreno devices that support ES3.0 only tend to be extremely buggy, so we + // fall back to ES2.0. + if (major == 3 && minor == 0) { + bugs->force_feature_level0 = true; + } + } else if (strstr(renderer, "Mali")) { + // ARM GPU + bugs->vao_doesnt_store_element_array_buffer_binding = true; + if (strstr(renderer, "Mali-T")) { + bugs->disable_glFlush = true; + bugs->disable_shared_context_draws = true; + bugs->texture_external_needs_rebind = true; + // We have not verified that timer queries work on Mali-T, so we disable to be safe. + bugs->dont_use_timer_query = true; + } + if (strstr(renderer, "Mali-G")) { + // We have run into several problems with timer queries on Mali-Gxx: + // - timer queries seem to cause memory corruptions in some cases on some devices + // (see b/233754398) + // - appeared at least in: "OpenGL ES 3.2 v1.r26p0-01eac0" + // - wasn't present in: "OpenGL ES 3.2 v1.r32p1-00pxl1" + // - timer queries sometime crash with an NPE (see b/273759031) + bugs->dont_use_timer_query = true; + } + // Mali seems to have no problem with this (which is good for us) + bugs->allow_read_only_ancillary_feedback_loop = true; + } else if (strstr(renderer, "Intel")) { + // Intel GPU + bugs->vao_doesnt_store_element_array_buffer_binding = true; + } else if (strstr(renderer, "PowerVR")) { + // PowerVR GPU + // On PowerVR (Rogue GE8320) glFlush doesn't seem to do anything, in particular, + // it doesn't kick the GPU earlier, so don't issue these calls as they seem to slow + // things down. + bugs->disable_glFlush = true; + // On PowerVR (Rogue GE8320) using gl_InstanceID too early in the shader doesn't work. + bugs->powervr_shader_workarounds = true; + // On PowerVR (Rogue GE8320) destroying a fbo after glBlitFramebuffer is effectively + // equivalent to glFinish. + bugs->delay_fbo_destruction = true; + // PowerVR seems to have no problem with this (which is good for us) + bugs->allow_read_only_ancillary_feedback_loop = true; + // PowerVR has a shader compiler thread pinned on the last core + bugs->disable_thread_affinity = true; + } else if (strstr(renderer, "Apple")) { + // Apple GPU + } else if (strstr(renderer, "Tegra") || + strstr(renderer, "GeForce") || + strstr(renderer, "NV")) { + // NVIDIA GPU + } else if (strstr(renderer, "Vivante")) { + // Vivante GPU + } else if (strstr(renderer, "AMD") || + strstr(renderer, "ATI")) { + // AMD/ATI GPU + } else if (strstr(renderer, "Mozilla")) { + bugs->disable_invalidate_framebuffer = true; + } + } else { + // When running under ANGLE, it's a different set of workaround that we need. + if (strstr(renderer, "Adreno")) { + // Qualcomm GPU + // early exit condition is flattened in EASU code + // (that should be regardless of ANGLE, but we should double-check) + bugs->split_easu = true; + } + // TODO: see if we could use `bugs.allow_read_only_ancillary_feedback_loop = true` + } + +#ifdef BACKEND_OPENGL_VERSION_GLES +# ifndef IOS // IOS is guaranteed to have ES3.x + if (UTILS_UNLIKELY(major == 2)) { + if (UTILS_UNLIKELY(!exts.OES_vertex_array_object)) { + // we activate this workaround path, which does the reset of array buffer + bugs->vao_doesnt_store_element_array_buffer_binding = true; + } + } +# endif // IOS +#else + // feedback loops are allowed on GL desktop as long as writes are disabled + bugs->allow_read_only_ancillary_feedback_loop = true; +#endif +} + +FeatureLevel OpenGLContext::resolveFeatureLevel(GLint major, GLint minor, + Extensions const& exts, + Gets const& gets, + Bugs const& bugs) noexcept { + + constexpr auto const caps3 = FEATURE_LEVEL_CAPS[+FeatureLevel::FEATURE_LEVEL_3]; + constexpr GLint MAX_VERTEX_SAMPLER_COUNT = caps3.MAX_VERTEX_SAMPLER_COUNT; + constexpr GLint MAX_FRAGMENT_SAMPLER_COUNT = caps3.MAX_FRAGMENT_SAMPLER_COUNT; + + (void)exts; + (void)gets; + (void)bugs; + + FeatureLevel featureLevel = FeatureLevel::FEATURE_LEVEL_1; + +#ifdef BACKEND_OPENGL_VERSION_GLES + if (major == 3) { + // Runtime OpenGL version is ES 3.x + assert_invariant(gets.max_texture_image_units >= 16); + assert_invariant(gets.max_combined_texture_image_units >= 32); + if (minor >= 1) { + // figure out our feature level + if (exts.EXT_texture_cube_map_array) { + featureLevel = FeatureLevel::FEATURE_LEVEL_2; + if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT && + gets.max_combined_texture_image_units >= + (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) { + featureLevel = FeatureLevel::FEATURE_LEVEL_3; + } + } + } + } +# ifndef IOS // IOS is guaranteed to have ES3.x + else if (UTILS_UNLIKELY(major == 2)) { + // Runtime OpenGL version is ES 2.x +# if defined(BACKEND_OPENGL_LEVEL_GLES30) + // mandatory extensions (all supported by Mali-400 and Adreno 304) + assert_invariant(exts.OES_depth_texture); + assert_invariant(exts.OES_depth24); + assert_invariant(exts.OES_packed_depth_stencil); + assert_invariant(exts.OES_rgb8_rgba8); + assert_invariant(exts.OES_standard_derivatives); + assert_invariant(exts.OES_texture_npot); +# endif + featureLevel = FeatureLevel::FEATURE_LEVEL_0; + } +# endif // IOS +#else + assert_invariant(gets.max_texture_image_units >= 16); + assert_invariant(gets.max_combined_texture_image_units >= 32); + if (major == 4) { + assert_invariant(minor >= 1); + if (minor >= 3) { + // cubemap arrays are available as of OpenGL 4.0 + featureLevel = FeatureLevel::FEATURE_LEVEL_2; + // figure out our feature level + if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT && + gets.max_combined_texture_image_units >= + (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) { + featureLevel = FeatureLevel::FEATURE_LEVEL_3; + } + } + } +#endif + + if (bugs.force_feature_level0) { + featureLevel = FeatureLevel::FEATURE_LEVEL_0; + } + + return featureLevel; +} + #ifdef BACKEND_OPENGL_VERSION_GLES -void OpenGLContext::initExtensionsGLES() noexcept { +void OpenGLContext::initExtensionsGLES(Extensions* ext, GLint major, GLint minor) noexcept { const char * const extensions = (const char*)glGetString(GL_EXTENSIONS); GLUtils::unordered_string_set const exts = GLUtils::split(extensions); if constexpr (DEBUG_PRINT_EXTENSIONS) { @@ -472,51 +560,50 @@ void OpenGLContext::initExtensionsGLES() noexcept { // figure out and initialize the extensions we need using namespace std::literals; - ext.APPLE_color_buffer_packed_float = exts.has("GL_APPLE_color_buffer_packed_float"sv); - ext.EXT_clip_control = exts.has("GL_EXT_clip_control"sv); - ext.EXT_clip_cull_distance = exts.has("GL_EXT_clip_cull_distance"sv); - ext.EXT_color_buffer_float = exts.has("GL_EXT_color_buffer_float"sv); - ext.EXT_color_buffer_half_float = exts.has("GL_EXT_color_buffer_half_float"sv); - ext.EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv); - ext.EXT_discard_framebuffer = exts.has("GL_EXT_discard_framebuffer"sv); - ext.EXT_disjoint_timer_query = exts.has("GL_EXT_disjoint_timer_query"sv); - ext.EXT_multisampled_render_to_texture = exts.has("GL_EXT_multisampled_render_to_texture"sv); - ext.EXT_multisampled_render_to_texture2 = exts.has("GL_EXT_multisampled_render_to_texture2"sv); - ext.EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv); + ext->APPLE_color_buffer_packed_float = exts.has("GL_APPLE_color_buffer_packed_float"sv); + ext->EXT_clip_control = exts.has("GL_EXT_clip_control"sv); + ext->EXT_clip_cull_distance = exts.has("GL_EXT_clip_cull_distance"sv); + ext->EXT_color_buffer_float = exts.has("GL_EXT_color_buffer_float"sv); + ext->EXT_color_buffer_half_float = exts.has("GL_EXT_color_buffer_half_float"sv); + ext->EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv); + ext->EXT_discard_framebuffer = exts.has("GL_EXT_discard_framebuffer"sv); + ext->EXT_disjoint_timer_query = exts.has("GL_EXT_disjoint_timer_query"sv); + ext->EXT_multisampled_render_to_texture = exts.has("GL_EXT_multisampled_render_to_texture"sv); + ext->EXT_multisampled_render_to_texture2 = exts.has("GL_EXT_multisampled_render_to_texture2"sv); + ext->EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv); #if !defined(__EMSCRIPTEN__) - ext.EXT_texture_compression_etc2 = true; + ext->EXT_texture_compression_etc2 = true; #endif - ext.EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv); - ext.EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv); - ext.EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv); - ext.EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv); - ext.EXT_texture_cube_map_array = exts.has("GL_EXT_texture_cube_map_array"sv) || exts.has("GL_OES_texture_cube_map_array"sv); - ext.GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv); - ext.KHR_debug = exts.has("GL_KHR_debug"sv); - ext.KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv); - ext.KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv); - ext.KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv); - ext.OES_depth_texture = exts.has("GL_OES_depth_texture"sv); - ext.OES_depth24 = exts.has("GL_OES_depth24"sv); - ext.OES_packed_depth_stencil = exts.has("GL_OES_packed_depth_stencil"sv); - ext.OES_EGL_image_external_essl3 = exts.has("GL_OES_EGL_image_external_essl3"sv); - ext.OES_rgb8_rgba8 = exts.has("GL_OES_rgb8_rgba8"sv); - ext.OES_standard_derivatives = exts.has("GL_OES_standard_derivatives"sv); - ext.OES_texture_npot = exts.has("GL_OES_texture_npot"sv); - ext.OES_vertex_array_object = exts.has("GL_OES_vertex_array_object"sv); - ext.WEBGL_compressed_texture_etc = exts.has("WEBGL_compressed_texture_etc"sv); - ext.WEBGL_compressed_texture_s3tc = exts.has("WEBGL_compressed_texture_s3tc"sv); - ext.WEBGL_compressed_texture_s3tc_srgb = exts.has("WEBGL_compressed_texture_s3tc_srgb"sv); + ext->EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv); + ext->EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv); + ext->EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv); + ext->EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv); + ext->EXT_texture_cube_map_array = exts.has("GL_EXT_texture_cube_map_array"sv) || exts.has("GL_OES_texture_cube_map_array"sv); + ext->GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv); + ext->KHR_debug = exts.has("GL_KHR_debug"sv); + ext->KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv); + ext->KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv); + ext->KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv); + ext->OES_depth_texture = exts.has("GL_OES_depth_texture"sv); + ext->OES_depth24 = exts.has("GL_OES_depth24"sv); + ext->OES_packed_depth_stencil = exts.has("GL_OES_packed_depth_stencil"sv); + ext->OES_EGL_image_external_essl3 = exts.has("GL_OES_EGL_image_external_essl3"sv); + ext->OES_rgb8_rgba8 = exts.has("GL_OES_rgb8_rgba8"sv); + ext->OES_standard_derivatives = exts.has("GL_OES_standard_derivatives"sv); + ext->OES_texture_npot = exts.has("GL_OES_texture_npot"sv); + ext->OES_vertex_array_object = exts.has("GL_OES_vertex_array_object"sv); + ext->WEBGL_compressed_texture_etc = exts.has("WEBGL_compressed_texture_etc"sv); + ext->WEBGL_compressed_texture_s3tc = exts.has("WEBGL_compressed_texture_s3tc"sv); + ext->WEBGL_compressed_texture_s3tc_srgb = exts.has("WEBGL_compressed_texture_s3tc_srgb"sv); // ES 3.2 implies EXT_color_buffer_float - if (state.major > 3 || (state.major == 3 && state.minor >= 2)) { - ext.EXT_color_buffer_float = true; + if (major > 3 || (major == 3 && minor >= 2)) { + ext->EXT_color_buffer_float = true; } - // ES 3.x implies EXT_discard_framebuffer and OES_vertex_array_object - if (state.major >= 3) { - ext.EXT_discard_framebuffer = true; - ext.OES_vertex_array_object = true; + if (major >= 3) { + ext->EXT_discard_framebuffer = true; + ext->OES_vertex_array_object = true; } } @@ -524,7 +611,7 @@ void OpenGLContext::initExtensionsGLES() noexcept { #ifdef BACKEND_OPENGL_VERSION_GL -void OpenGLContext::initExtensionsGL() noexcept { +void OpenGLContext::initExtensionsGL(Extensions* ext, GLint major, GLint minor) noexcept { GLUtils::unordered_string_set exts; GLint n = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &n); @@ -539,55 +626,52 @@ void OpenGLContext::initExtensionsGL() noexcept { } using namespace std::literals; - ext.APPLE_color_buffer_packed_float = true; // Assumes core profile. - ext.ARB_shading_language_packing = exts.has("GL_ARB_shading_language_packing"sv); - ext.EXT_color_buffer_float = true; // Assumes core profile. - ext.EXT_color_buffer_half_float = true; // Assumes core profile. - ext.EXT_clip_cull_distance = true; - ext.EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv); - ext.EXT_discard_framebuffer = false; - ext.EXT_disjoint_timer_query = true; - ext.EXT_multisampled_render_to_texture = false; - ext.EXT_multisampled_render_to_texture2 = false; - ext.EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv); - ext.EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv); - ext.EXT_texture_compression_etc2 = exts.has("GL_ARB_ES3_compatibility"sv); - ext.EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv); - ext.EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv); - ext.EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv); - ext.EXT_texture_cube_map_array = true; - ext.EXT_texture_filter_anisotropic = exts.has("GL_EXT_texture_filter_anisotropic"sv); - ext.EXT_texture_sRGB = exts.has("GL_EXT_texture_sRGB"sv); - ext.GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv); - ext.KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv); - ext.KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv); - ext.KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv); - ext.OES_depth_texture = true; - ext.OES_depth24 = true; - ext.OES_EGL_image_external_essl3 = false; - ext.OES_rgb8_rgba8 = true; - ext.OES_standard_derivatives = true; - ext.OES_texture_npot = true; - ext.OES_vertex_array_object = true; - ext.WEBGL_compressed_texture_etc = false; - ext.WEBGL_compressed_texture_s3tc = false; - ext.WEBGL_compressed_texture_s3tc_srgb = false; - - auto const major = state.major; - auto const minor = state.minor; + ext->APPLE_color_buffer_packed_float = true; // Assumes core profile. + ext->ARB_shading_language_packing = exts.has("GL_ARB_shading_language_packing"sv); + ext->EXT_color_buffer_float = true; // Assumes core profile. + ext->EXT_color_buffer_half_float = true; // Assumes core profile. + ext->EXT_clip_cull_distance = true; + ext->EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv); + ext->EXT_discard_framebuffer = false; + ext->EXT_disjoint_timer_query = true; + ext->EXT_multisampled_render_to_texture = false; + ext->EXT_multisampled_render_to_texture2 = false; + ext->EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv); + ext->EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv); + ext->EXT_texture_compression_etc2 = exts.has("GL_ARB_ES3_compatibility"sv); + ext->EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv); + ext->EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv); + ext->EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv); + ext->EXT_texture_cube_map_array = true; + ext->EXT_texture_filter_anisotropic = exts.has("GL_EXT_texture_filter_anisotropic"sv); + ext->EXT_texture_sRGB = exts.has("GL_EXT_texture_sRGB"sv); + ext->GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv); + ext->KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv); + ext->KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv); + ext->KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv); + ext->OES_depth_texture = true; + ext->OES_depth24 = true; + ext->OES_EGL_image_external_essl3 = false; + ext->OES_rgb8_rgba8 = true; + ext->OES_standard_derivatives = true; + ext->OES_texture_npot = true; + ext->OES_vertex_array_object = true; + ext->WEBGL_compressed_texture_etc = false; + ext->WEBGL_compressed_texture_s3tc = false; + ext->WEBGL_compressed_texture_s3tc_srgb = false; // OpenGL 4.2 implies ARB_shading_language_packing if (major > 4 || (major == 4 && minor >= 2)) { - ext.ARB_shading_language_packing = true; + ext->ARB_shading_language_packing = true; } // OpenGL 4.3 implies EXT_discard_framebuffer if (major > 4 || (major == 4 && minor >= 3)) { - ext.EXT_discard_framebuffer = true; - ext.KHR_debug = true; + ext->EXT_discard_framebuffer = true; + ext->KHR_debug = true; } // OpenGL 4.5 implies EXT_clip_control if (major > 4 || (major == 4 && minor >= 5)) { - ext.EXT_clip_control = true; + ext->EXT_clip_control = true; } } @@ -683,7 +767,7 @@ void OpenGLContext::deleteBuffers(GLsizei n, const GLuint* buffers, GLenum targe } #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 - assert_invariant(state.major > 2 || + assert_invariant(mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1 || (target != GL_UNIFORM_BUFFER && target != GL_TRANSFORM_FEEDBACK_BUFFER)); if (target == GL_UNIFORM_BUFFER || target == GL_TRANSFORM_FEEDBACK_BUFFER) { diff --git a/filament/backend/src/opengl/OpenGLContext.h b/filament/backend/src/opengl/OpenGLContext.h index 569d7749928..ff8b29cbd54 100644 --- a/filament/backend/src/opengl/OpenGLContext.h +++ b/filament/backend/src/opengl/OpenGLContext.h @@ -92,7 +92,7 @@ class OpenGLContext { # ifndef BACKEND_OPENGL_LEVEL_GLES30 return true; # else - return state.major == 2; + return mFeatureLevel == FeatureLevel::FEATURE_LEVEL_0; # endif #else return false; @@ -151,7 +151,7 @@ class OpenGLContext { void deleteVertexArrays(GLsizei n, const GLuint* arrays) noexcept; // glGet*() values - struct { + struct Gets { GLfloat max_anisotropy; GLint max_draw_buffers; GLint max_renderbuffer_size; @@ -170,7 +170,7 @@ class OpenGLContext { } features = {}; // supported extensions detected at runtime - struct { + struct Extensions { bool APPLE_color_buffer_packed_float; bool ARB_shading_language_packing; bool EXT_clip_control; @@ -209,7 +209,7 @@ class OpenGLContext { bool WEBGL_compressed_texture_s3tc_srgb; } ext = {}; - struct { + struct Bugs { // Some drivers have issues with UBOs in the fragment shader when // glFlush() is called between draw calls. bool disable_glFlush; @@ -275,6 +275,10 @@ class OpenGLContext { // performance more if we end-up pinned on the same one. bool disable_thread_affinity; + // Force feature level 0. Typically used for low end ES3 devices with significant driver + // bugs or performance issues. + bool force_feature_level0; + } bugs = {}; // state getters -- as needed. @@ -397,7 +401,7 @@ class OpenGLContext { } window; } state; - struct { + struct Procs { void (* bindVertexArray)(GLuint array); void (* deleteVertexArrays)(GLsizei n, const GLuint* arrays); void (* genVertexArrays)(GLsizei n, GLuint* arrays); @@ -467,18 +471,46 @@ class OpenGLContext { { bugs.disable_thread_affinity, "disable_thread_affinity", ""}, + { bugs.force_feature_level0, + "force_feature_level0", + ""}, }}; RenderPrimitive mDefaultVAO; // this is chosen to minimize code size #if defined(BACKEND_OPENGL_VERSION_GLES) - void initExtensionsGLES() noexcept; + static void initExtensionsGLES(Extensions* ext, GLint major, GLint minor) noexcept; #endif #if defined(BACKEND_OPENGL_VERSION_GL) - void initExtensionsGL() noexcept; + static void initExtensionsGL(Extensions* ext, GLint major, GLint minor) noexcept; #endif + static void initExtensions(Extensions* ext, GLint major, GLint minor) noexcept { +#if defined(BACKEND_OPENGL_VERSION_GLES) + initExtensionsGLES(ext, major, minor); +#endif +#if defined(BACKEND_OPENGL_VERSION_GL) + initExtensionsGL(ext, major, minor); +#endif + } + + static void initBugs(Bugs* bugs, Extensions const& exts, + GLint major, GLint minor, + char const* vendor, + char const* renderer, + char const* version, + char const* shader + ); + + static void initProcs(Procs* procs, + Extensions const& exts, GLint major, GLint minor) noexcept; + + static FeatureLevel resolveFeatureLevel(GLint major, GLint minor, + Extensions const& exts, + Gets const& gets, + Bugs const& bugs) noexcept; + template static inline void update_state(T& state, T const& expected, F functor, bool force = false) noexcept { if (UTILS_UNLIKELY(force || state != expected)) { @@ -571,7 +603,7 @@ void OpenGLContext::activeTexture(GLuint unit) noexcept { void OpenGLContext::bindSampler(GLuint unit, GLuint sampler) noexcept { assert_invariant(unit < MAX_TEXTURE_UNIT_COUNT); - assert_invariant(state.major > 2); + assert_invariant(mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1); #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 update_state(state.textures.units[unit].sampler, sampler, [&]() { glBindSampler(unit, sampler); @@ -617,7 +649,7 @@ void OpenGLContext::bindVertexArray(RenderPrimitive const* p) noexcept { void OpenGLContext::bindBufferRange(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size) noexcept { - assert_invariant(state.major > 2); + assert_invariant(mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1); #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2 # ifdef BACKEND_OPENGL_LEVEL_GLES31 From aa6e94a128035f3398102fad2bcda5354568a72f Mon Sep 17 00:00:00 2001 From: Jacob Su Date: Fri, 18 Aug 2023 18:42:25 +0800 Subject: [PATCH 12/23] Fix Mat cofactor UT error on Mac M2 chip machine. --- libs/math/tests/test_mat.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/libs/math/tests/test_mat.cpp b/libs/math/tests/test_mat.cpp index 6afe9c81fc3..d53e73a9168 100644 --- a/libs/math/tests/test_mat.cpp +++ b/libs/math/tests/test_mat.cpp @@ -573,6 +573,18 @@ do { \ } \ } while(0) +//------------------------------------------------------------------------------ +// A macro to help with vector comparisons within a range. +#define EXPECT_VEC_NEAR(VEC1, VEC2, eps) \ +do { \ + const decltype(VEC1) v1 = VEC1; \ + const decltype(VEC2) v2 = VEC2; \ + for (int i = 0; i < v1.size(); ++i) { \ + EXPECT_NEAR(v1[i], v2[i], eps); \ + } \ +} while(0) + + //------------------------------------------------------------------------------ // A macro to help with type comparisons within floating point range. #define ASSERT_TYPE_EQ(T1, T2) \ @@ -834,9 +846,10 @@ TYPED_TEST(MatTestT, cofactor) { M33T r = M33T::eulerZYX(rand_gen(), rand_gen(), rand_gen()); M33T c0 = details::matrix::cofactor(r); M33T c1 = details::matrix::fastCofactor3(r); - EXPECT_VEC_EQ(c0[0], c1[0]); - EXPECT_VEC_EQ(c0[1], c1[1]); - EXPECT_VEC_EQ(c0[2], c1[2]); + + EXPECT_VEC_NEAR(c0[0], c1[0], value_eps); + EXPECT_VEC_NEAR(c0[1], c1[1], value_eps); + EXPECT_VEC_NEAR(c0[2], c1[2], value_eps); } } From 66081e6cc1905be8d7741477172d595f850fe479 Mon Sep 17 00:00:00 2001 From: Romain Guy Date: Mon, 21 Aug 2023 10:39:34 -0700 Subject: [PATCH 13/23] Add fields used by JNI to proguard rules (#7096) --- .../com/google/android/filament/View.java | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/android/filament-android/src/main/java/com/google/android/filament/View.java b/android/filament-android/src/main/java/com/google/android/filament/View.java index ad2abc138bc..1b1d70dac4f 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/View.java +++ b/android/filament-android/src/main/java/com/google/android/filament/View.java @@ -27,6 +27,8 @@ import static com.google.android.filament.Asserts.assertFloat4In; import static com.google.android.filament.Colors.LinearColor; +import com.google.android.filament.proguard.UsedByNative; + /** * Encompasses all the state needed for rendering a {@link Scene}. * @@ -1095,10 +1097,29 @@ public void pick(int x, int y, nPick(getNativeObject(), x, y, handler, internalCallback); } + @UsedByNative("View.cpp") private static class InternalOnPickCallback implements Runnable { + private final OnPickCallback mUserCallback; + private final PickingQueryResult mPickingQueryResult = new PickingQueryResult(); + + @UsedByNative("View.cpp") + @Entity + int mRenderable; + + @UsedByNative("View.cpp") + float mDepth; + + @UsedByNative("View.cpp") + float mFragCoordsX; + @UsedByNative("View.cpp") + float mFragCoordsY; + @UsedByNative("View.cpp") + float mFragCoordsZ; + public InternalOnPickCallback(OnPickCallback mUserCallback) { this.mUserCallback = mUserCallback; } + @Override public void run() { mPickingQueryResult.renderable = mRenderable; @@ -1108,13 +1129,6 @@ public void run() { mPickingQueryResult.fragCoords[2] = mFragCoordsZ; mUserCallback.onPick(mPickingQueryResult); } - private final OnPickCallback mUserCallback; - private final PickingQueryResult mPickingQueryResult = new PickingQueryResult(); - @Entity int mRenderable; - float mDepth; - float mFragCoordsX; - float mFragCoordsY; - float mFragCoordsZ; } /** From ecd5b681d0df7e44c0b886ccab41e7e8d0bd0aa2 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Mon, 21 Aug 2023 10:49:44 -0700 Subject: [PATCH 14/23] Update MaterialEnums.h (#7098) --- libs/filabridge/include/filament/MaterialEnums.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/filabridge/include/filament/MaterialEnums.h b/libs/filabridge/include/filament/MaterialEnums.h index d3becae86da..f697212c111 100644 --- a/libs/filabridge/include/filament/MaterialEnums.h +++ b/libs/filabridge/include/filament/MaterialEnums.h @@ -28,7 +28,7 @@ namespace filament { // update this when a new version of filament wouldn't work with older materials -static constexpr size_t MATERIAL_VERSION = 41; +static constexpr size_t MATERIAL_VERSION = 42; /** * Supported shading models From c3c0dde82f440193ccf7f83f76b6ddde08986431 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Tue, 22 Aug 2023 12:36:45 -0700 Subject: [PATCH 15/23] vulkan: fix crashing Pixel 4xl adreno (#7087) Adreno doesn't seem to like defining the size of arrays using a `const int`. --- libs/filamat/src/shaders/CodeGenerator.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/filamat/src/shaders/CodeGenerator.cpp b/libs/filamat/src/shaders/CodeGenerator.cpp index 90a20f321a3..b854ff3fe32 100644 --- a/libs/filamat/src/shaders/CodeGenerator.cpp +++ b/libs/filamat/src/shaders/CodeGenerator.cpp @@ -239,7 +239,10 @@ utils::io::sstream& CodeGenerator::generateProlog(utils::io::sstream& out, Shade generateSpecializationConstant(out, "CONFIG_POWER_VR_SHADER_WORKAROUNDS", +ReservedSpecializationConstants::CONFIG_POWER_VR_SHADER_WORKAROUNDS, false); - out << "const int CONFIG_STEREOSCOPIC_EYES = " << (int)CONFIG_STEREOSCOPIC_EYES << ";\n"; + // CONFIG_STEREOSCOPIC_EYES is used to size arrays and on Adreno GPUs + vulkan, this has to + // be explicitly, statically defined (as in #define). Otherwise (using const int for + // example), we'd run into a GPU crash. + out << "#define CONFIG_STEREOSCOPIC_EYES " << (int) CONFIG_STEREOSCOPIC_EYES << "\n"; if (material.featureLevel == 0) { // On ES2 since we don't have post-processing, we need to emulate EGL_GL_COLORSPACE_KHR, From 04669f6ab9d3b4f670949f867ed163b9a04301d2 Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Tue, 22 Aug 2023 12:41:56 -0700 Subject: [PATCH 16/23] Add Engine query for stereoscopic support (#7086) --- filament/backend/include/private/backend/DriverAPI.inc | 1 + filament/backend/src/metal/MetalDriver.mm | 4 ++++ filament/backend/src/noop/NoopDriver.cpp | 4 ++++ filament/backend/src/opengl/OpenGLDriver.cpp | 8 ++++++++ filament/backend/src/vulkan/VulkanDriver.cpp | 4 ++++ filament/include/filament/Engine.h | 8 ++++++++ filament/include/filament/View.h | 5 ++++- filament/src/Engine.cpp | 4 ++++ filament/src/View.cpp | 2 +- filament/src/details/Engine.h | 2 ++ filament/src/details/Material.cpp | 5 +++++ filament/src/details/View.cpp | 5 ++++- filament/src/details/View.h | 3 ++- 13 files changed, 51 insertions(+), 4 deletions(-) diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index 98b54663ba6..053b3425910 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -297,6 +297,7 @@ DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameBufferFetchMultiSampleSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameTimeSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isAutoDepthResolveSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isSRGBSwapChainSupported) +DECL_DRIVER_API_SYNCHRONOUS_0(bool, isStereoSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isParallelShaderCompileSupported) DECL_DRIVER_API_SYNCHRONOUS_0(uint8_t, getMaxDrawBuffers) DECL_DRIVER_API_SYNCHRONOUS_0(size_t, getMaxUniformBufferSize) diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 231aef4d71c..32ae1aa08c6 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -696,6 +696,10 @@ return false; } +bool MetalDriver::isStereoSupported() { + return true; +} + bool MetalDriver::isParallelShaderCompileSupported() { return false; } diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp index 72265483640..a2c6ef8d989 100644 --- a/filament/backend/src/noop/NoopDriver.cpp +++ b/filament/backend/src/noop/NoopDriver.cpp @@ -174,6 +174,10 @@ bool NoopDriver::isSRGBSwapChainSupported() { return false; } +bool NoopDriver::isStereoSupported() { + return false; +} + bool NoopDriver::isParallelShaderCompileSupported() { return false; } diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index 84b3705e14e..d15079bb482 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -1872,6 +1872,14 @@ bool OpenGLDriver::isSRGBSwapChainSupported() { return mPlatform.isSRGBSwapChainSupported(); } +bool OpenGLDriver::isStereoSupported() { + // Stereo requires instancing and EXT_clip_cull_distance. + if (UTILS_UNLIKELY(mContext.isES2())) { + return false; + } + return mContext.ext.EXT_clip_cull_distance; +} + bool OpenGLDriver::isParallelShaderCompileSupported() { return mShaderCompilerService.isParallelShaderCompileSupported(); } diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 7187a6a3277..d7792608023 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -755,6 +755,10 @@ bool VulkanDriver::isSRGBSwapChainSupported() { return mPlatform->isSRGBSwapChainSupported(); } +bool VulkanDriver::isStereoSupported() { + return true; +} + bool VulkanDriver::isParallelShaderCompileSupported() { return false; } diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h index e4d601b3cde..f4173f26144 100644 --- a/filament/include/filament/Engine.h +++ b/filament/include/filament/Engine.h @@ -513,6 +513,14 @@ class UTILS_PUBLIC Engine { */ size_t getMaxAutomaticInstances() const noexcept; + /** + * Queries the device and platform for instanced stereo rendering support. + * + * @return true if stereo rendering is supported, false otherwise + * @see View::setStereoscopicOptions + */ + bool isStereoSupported() const noexcept; + /** * @return EntityManager used by filament */ diff --git a/filament/include/filament/View.h b/filament/include/filament/View.h index b19ea3bfe07..7c0f1683e42 100644 --- a/filament/include/filament/View.h +++ b/filament/include/filament/View.h @@ -689,9 +689,12 @@ class UTILS_PUBLIC View : public FilamentAPI { * - shadowing * - punctual lights * + * Stereo rendering depends on device and platform support. To check if stereo rendering is + * supported, use Engine::isStereoSupported(). + * * @param options The stereoscopic options to use on this view */ - void setStereoscopicOptions(StereoscopicOptions const& options) noexcept; + void setStereoscopicOptions(StereoscopicOptions const& options); /** * Returns the stereoscopic options associated with this View. diff --git a/filament/src/Engine.cpp b/filament/src/Engine.cpp index ecedc590efb..01b956484c4 100644 --- a/filament/src/Engine.cpp +++ b/filament/src/Engine.cpp @@ -327,6 +327,10 @@ size_t Engine::getMaxAutomaticInstances() const noexcept { return downcast(this)->getMaxAutomaticInstances(); } +bool Engine::isStereoSupported() const noexcept { + return downcast(this)->isStereoSupported(); +} + #if defined(__EMSCRIPTEN__) void Engine::resetBackendState() noexcept { downcast(this)->resetBackendState(); diff --git a/filament/src/View.cpp b/filament/src/View.cpp index bc5da818290..dd8e9380a75 100644 --- a/filament/src/View.cpp +++ b/filament/src/View.cpp @@ -283,7 +283,7 @@ bool View::isStencilBufferEnabled() const noexcept { return downcast(this)->isStencilBufferEnabled(); } -void View::setStereoscopicOptions(const StereoscopicOptions& options) noexcept { +void View::setStereoscopicOptions(const StereoscopicOptions& options) { return downcast(this)->setStereoscopicOptions(options); } diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h index 134cbe5067c..d9a7caeb917 100644 --- a/filament/src/details/Engine.h +++ b/filament/src/details/Engine.h @@ -182,6 +182,8 @@ class FEngine : public Engine { return CONFIG_MAX_INSTANCES; } + bool isStereoSupported() const noexcept { return getDriver().isStereoSupported(); } + PostProcessManager const& getPostProcessManager() const noexcept { return mPostProcessManager; } diff --git a/filament/src/details/Material.cpp b/filament/src/details/Material.cpp index 6d655b3777f..f9cf7405cdd 100644 --- a/filament/src/details/Material.cpp +++ b/filament/src/details/Material.cpp @@ -478,6 +478,11 @@ void FMaterial::compile(CompilerPriorityQueue priority, backend::CallbackHandler* handler, utils::Invocable&& callback) noexcept { + // Turn off the STE variant if stereo is not supported. + if (!mEngine.getDriverApi().isStereoSupported()) { + variantSpec &= ~UserVariantFilterMask(UserVariantFilterBit::STE); + } + UserVariantFilterMask const variantFilter = ~variantSpec & UserVariantFilterMask(UserVariantFilterBit::ALL); diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp index bb85c059bf8..5b932a42b7f 100644 --- a/filament/src/details/View.cpp +++ b/filament/src/details/View.cpp @@ -58,6 +58,7 @@ static constexpr float PID_CONTROLLER_Kd = 0.0f; FView::FView(FEngine& engine) : mFroxelizer(engine), mFogEntity(engine.getEntityManager().create()), + mIsStereoSupported(engine.getDriverApi().isStereoSupported()), mPerViewUniforms(engine), mShadowMapManager(engine) { DriverApi& driver = engine.getDriverApi(); @@ -1117,7 +1118,9 @@ View::PickingQuery& FView::pick(uint32_t x, uint32_t y, backend::CallbackHandler return *pQuery; } -void FView::setStereoscopicOptions(const StereoscopicOptions& options) noexcept { +void FView::setStereoscopicOptions(const StereoscopicOptions& options) { + ASSERT_PRECONDITION(!options.enabled || mIsStereoSupported, + "Stereo rendering is not supported."); mStereoscopicOptions = options; } diff --git a/filament/src/details/View.h b/filament/src/details/View.h index 60122aec4fd..f452ce22f18 100644 --- a/filament/src/details/View.h +++ b/filament/src/details/View.h @@ -193,7 +193,7 @@ class FView : public View { bool isStencilBufferEnabled() const noexcept { return mStencilBufferEnabled; } - void setStereoscopicOptions(StereoscopicOptions const& options) noexcept; + void setStereoscopicOptions(StereoscopicOptions const& options); FCamera const* getDirectionalLightCamera() const noexcept { return &mShadowMapManager.getShadowMap(0)->getDebugCamera(); @@ -524,6 +524,7 @@ class FView : public View { const FColorGrading* mColorGrading = nullptr; const FColorGrading* mDefaultColorGrading = nullptr; utils::Entity mFogEntity{}; + bool mIsStereoSupported : 1; PIDController mPidController; DynamicResolutionOptions mDynamicResolution; From ad45cc90926814d936151ab2df5f190813970850 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Tue, 22 Aug 2023 13:35:15 -0700 Subject: [PATCH 17/23] Release Filament 1.42.0 --- README.md | 4 ++-- RELEASE_NOTES.md | 2 ++ android/gradle.properties | 2 +- ios/CocoaPods/Filament.podspec | 4 ++-- web/filament-js/package.json | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c0be01cf2fd..0ed084ef84b 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.41.0' + implementation 'com.google.android.filament:filament-android:1.42.0' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ``` -pod 'Filament', '~> 1.41.0' +pod 'Filament', '~> 1.42.0' ``` ### Snapshots diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 0058fb82e43..4beb4affcca 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,8 @@ A new header is inserted each time a *tag* is created. Instead, if you are authoring a PR for the main branch, add your release note to [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md). +## v1.42.1 + ## v1.42.0 - engine: add preliminary support for instanced stereoscopic rendering [⚠️ **Recompile materials**] diff --git a/android/gradle.properties b/android/gradle.properties index 90ffbf3a907..6962d3e13ee 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.41.0 +VERSION_NAME=1.42.0 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 0f762db725e..33db11f276e 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.41.0" + spec.version = "1.42.0" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.41.0/filament-v1.41.0-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.42.0/filament-v1.42.0-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/web/filament-js/package.json b/web/filament-js/package.json index 8f6ce074e75..9dfa743cf1c 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.41.0", + "version": "1.42.0", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js", From e6384e0e9212a29a8a0fc900d9bace5eea00f97e Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Tue, 22 Aug 2023 13:41:42 -0700 Subject: [PATCH 18/23] Bump version to 1.42.1 --- README.md | 4 ++-- android/gradle.properties | 2 +- ios/CocoaPods/Filament.podspec | 4 ++-- web/filament-js/package.json | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0ed084ef84b..63f3a490ac7 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.42.0' + implementation 'com.google.android.filament:filament-android:1.42.1' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ``` -pod 'Filament', '~> 1.42.0' +pod 'Filament', '~> 1.42.1' ``` ### Snapshots diff --git a/android/gradle.properties b/android/gradle.properties index 6962d3e13ee..528d6fba23d 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.42.0 +VERSION_NAME=1.42.1 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 33db11f276e..8c09b6521b7 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.42.0" + spec.version = "1.42.1" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.42.0/filament-v1.42.0-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.42.1/filament-v1.42.1-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/web/filament-js/package.json b/web/filament-js/package.json index 9dfa743cf1c..3bb5eaa82c1 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.42.0", + "version": "1.42.1", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js", From 6a967ad0075f0a13da3b1a9995898756ad22e50c Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Wed, 30 Aug 2023 16:22:37 -0700 Subject: [PATCH 19/23] Make destroyFence asynchronous (#7127) --- RELEASE_NOTES.md | 2 ++ filament/backend/include/private/backend/DriverAPI.inc | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 4beb4affcca..fcab53ff80c 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -9,6 +9,8 @@ Instead, if you are authoring a PR for the main branch, add your release note to ## v1.42.1 +- Fix potential `EXC_BAD_ACCESS` with Metal backend: b/297059776 + ## v1.42.0 - engine: add preliminary support for instanced stereoscopic rendering [⚠️ **Recompile materials**] diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index 053b3425910..fa0b3e323bd 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -273,6 +273,7 @@ DECL_DRIVER_API_N(destroyRenderTarget, backend::RenderTargetHandle, rth) DECL_DRIVER_API_N(destroySwapChain, backend::SwapChainHandle, sch) DECL_DRIVER_API_N(destroyStream, backend::StreamHandle, sh) DECL_DRIVER_API_N(destroyTimerQuery, backend::TimerQueryHandle, sh) +DECL_DRIVER_API_N(destroyFence, backend::FenceHandle, fh) /* * Synchronous APIs @@ -286,7 +287,6 @@ DECL_DRIVER_API_SYNCHRONOUS_N(void, setAcquiredImage, backend::StreamHandle, str DECL_DRIVER_API_SYNCHRONOUS_N(void, setStreamDimensions, backend::StreamHandle, stream, uint32_t, width, uint32_t, height) DECL_DRIVER_API_SYNCHRONOUS_N(int64_t, getStreamTimestamp, backend::StreamHandle, stream) DECL_DRIVER_API_SYNCHRONOUS_N(void, updateStreams, backend::DriverApi*, driver) -DECL_DRIVER_API_SYNCHRONOUS_N(void, destroyFence, backend::FenceHandle, fh) DECL_DRIVER_API_SYNCHRONOUS_N(backend::FenceStatus, getFenceStatus, backend::FenceHandle, fh) DECL_DRIVER_API_SYNCHRONOUS_N(bool, isTextureFormatSupported, backend::TextureFormat, format) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isTextureSwizzleSupported) From 29ce1cad84ca84aa3f3d16c44843fa1edf6119ab Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Mon, 28 Aug 2023 10:27:38 -0700 Subject: [PATCH 20/23] Transition setFrameCompletedCallback to take a CallbackHandler (#7103) --- RELEASE_NOTES.md | 1 + .../src/main/cpp/SwapChain.cpp | 7 ++-- .../google/android/filament/SwapChain.java | 4 -- filament/backend/CMakeLists.txt | 1 - .../backend/include/backend/CallbackHandler.h | 2 +- .../backend/include/backend/DriverEnums.h | 2 - .../include/private/backend/DriverAPI.inc | 3 +- filament/backend/src/CallbackHandler.cpp | 23 ------------ filament/backend/src/metal/MetalDriver.mm | 4 +- filament/backend/src/metal/MetalHandles.h | 10 +++-- filament/backend/src/metal/MetalHandles.mm | 37 +++++++------------ filament/backend/src/noop/NoopDriver.cpp | 2 +- filament/backend/src/opengl/OpenGLDriver.cpp | 2 +- filament/backend/src/vulkan/VulkanDriver.cpp | 2 +- filament/include/filament/SwapChain.h | 21 ++++++++--- filament/src/SwapChain.cpp | 5 ++- filament/src/details/SwapChain.cpp | 20 +++++++++- filament/src/details/SwapChain.h | 6 ++- 18 files changed, 73 insertions(+), 79 deletions(-) delete mode 100644 filament/backend/src/CallbackHandler.cpp diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index fcab53ff80c..cb018b907c3 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -10,6 +10,7 @@ Instead, if you are authoring a PR for the main branch, add your release note to ## v1.42.1 - Fix potential `EXC_BAD_ACCESS` with Metal backend: b/297059776 +- `setFrameCompletedCallback` now takes a `backend::CallbackHandler`. ## v1.42.0 diff --git a/android/filament-android/src/main/cpp/SwapChain.cpp b/android/filament-android/src/main/cpp/SwapChain.cpp index 3693803ce18..27e006ae87a 100644 --- a/android/filament-android/src/main/cpp/SwapChain.cpp +++ b/android/filament-android/src/main/cpp/SwapChain.cpp @@ -27,11 +27,10 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_SwapChain_nSetFrameCompletedCallback(JNIEnv* env, jclass, jlong nativeSwapChain, jobject handler, jobject runnable) { SwapChain* swapChain = (SwapChain*) nativeSwapChain; - auto *callback = JniCallback::make(env, handler, runnable); - swapChain->setFrameCompletedCallback([](void* user) { - JniCallback* callback = (JniCallback*)user; + auto* callback = JniCallback::make(env, handler, runnable); + swapChain->setFrameCompletedCallback(nullptr, [callback](SwapChain* swapChain) { JniCallback::postToJavaAndDestroy(callback); - }, callback); + }); } extern "C" JNIEXPORT jboolean JNICALL diff --git a/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java b/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java index 6d621f02ffb..9c0867fee2d 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java +++ b/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java @@ -137,10 +137,6 @@ public Object getNativeWindow() { *

* *

- * The FrameCompletedCallback is guaranteed to be called on the main Filament thread. - *

- * - *

* Warning: Only Filament's Metal backend supports frame callbacks. Other backends ignore the * callback (which will never be called) and proceed normally. *

diff --git a/filament/backend/CMakeLists.txt b/filament/backend/CMakeLists.txt index 75416a3d4d1..7e21bf39199 100644 --- a/filament/backend/CMakeLists.txt +++ b/filament/backend/CMakeLists.txt @@ -27,7 +27,6 @@ set(SRCS src/BackendUtils.cpp src/BlobCacheKey.cpp src/Callable.cpp - src/CallbackHandler.cpp src/CircularBuffer.cpp src/CommandBufferQueue.cpp src/CommandStream.cpp diff --git a/filament/backend/include/backend/CallbackHandler.h b/filament/backend/include/backend/CallbackHandler.h index dee3aaa2515..3ffc707cdd1 100644 --- a/filament/backend/include/backend/CallbackHandler.h +++ b/filament/backend/include/backend/CallbackHandler.h @@ -66,7 +66,7 @@ class CallbackHandler { virtual void post(void* user, Callback callback) = 0; protected: - virtual ~CallbackHandler(); + virtual ~CallbackHandler() = default; }; } // namespace filament::backend diff --git a/filament/backend/include/backend/DriverEnums.h b/filament/backend/include/backend/DriverEnums.h index 0e492893fa9..a7ef823941b 100644 --- a/filament/backend/include/backend/DriverEnums.h +++ b/filament/backend/include/backend/DriverEnums.h @@ -1126,8 +1126,6 @@ static_assert(sizeof(StencilState) == 12u, using FrameScheduledCallback = void(*)(PresentCallable callable, void* user); -using FrameCompletedCallback = void(*)(void* user); - enum class Workaround : uint16_t { // The EASU pass must split because shader compiler flattens early-exit branch SPLIT_EASU, diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index fa0b3e323bd..37ddd4c6ba0 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -142,7 +142,8 @@ DECL_DRIVER_API_N(setFrameScheduledCallback, DECL_DRIVER_API_N(setFrameCompletedCallback, backend::SwapChainHandle, sch, - backend::FrameCompletedCallback, callback, + backend::CallbackHandler*, handler, + backend::CallbackHandler::Callback, callback, void*, user) DECL_DRIVER_API_N(setPresentationTime, diff --git a/filament/backend/src/CallbackHandler.cpp b/filament/backend/src/CallbackHandler.cpp deleted file mode 100644 index a1c067b6d26..00000000000 --- a/filament/backend/src/CallbackHandler.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2021 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -namespace filament::backend { - -CallbackHandler::~CallbackHandler() = default; - -} // namespace filament::backend diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 32ae1aa08c6..1d036d90a05 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -176,9 +176,9 @@ } void MetalDriver::setFrameCompletedCallback(Handle sch, - FrameCompletedCallback callback, void* user) { + CallbackHandler* handler, CallbackHandler::Callback callback, void* user) { auto* swapChain = handle_cast(sch); - swapChain->setFrameCompletedCallback(callback, user); + swapChain->setFrameCompletedCallback(handler, callback, user); } void MetalDriver::execute(std::function const& fn) noexcept { diff --git a/filament/backend/src/metal/MetalHandles.h b/filament/backend/src/metal/MetalHandles.h index 9ffa1a0bda8..b129d478d73 100644 --- a/filament/backend/src/metal/MetalHandles.h +++ b/filament/backend/src/metal/MetalHandles.h @@ -70,7 +70,8 @@ class MetalSwapChain : public HwSwapChain { void releaseDrawable(); void setFrameScheduledCallback(FrameScheduledCallback callback, void* user); - void setFrameCompletedCallback(FrameCompletedCallback callback, void* user); + void setFrameCompletedCallback(CallbackHandler* handler, + CallbackHandler::Callback callback, void* user); // For CAMetalLayer-backed SwapChains, presents the drawable or schedules a // FrameScheduledCallback. @@ -112,8 +113,11 @@ class MetalSwapChain : public HwSwapChain { FrameScheduledCallback frameScheduledCallback = nullptr; void* frameScheduledUserData = nullptr; - FrameCompletedCallback frameCompletedCallback = nullptr; - void* frameCompletedUserData = nullptr; + struct { + CallbackHandler* handler = nullptr; + CallbackHandler::Callback callback = {}; + void* user = nullptr; + } frameCompleted; }; class MetalBufferObject : public HwBufferObject { diff --git a/filament/backend/src/metal/MetalHandles.mm b/filament/backend/src/metal/MetalHandles.mm index 99e8b36227d..0b4d0b3c4dd 100644 --- a/filament/backend/src/metal/MetalHandles.mm +++ b/filament/backend/src/metal/MetalHandles.mm @@ -194,13 +194,15 @@ static inline MTLTextureUsage getMetalTextureUsage(TextureUsage usage) { frameScheduledUserData = user; } -void MetalSwapChain::setFrameCompletedCallback(FrameCompletedCallback callback, void* user) { - frameCompletedCallback = callback; - frameCompletedUserData = user; +void MetalSwapChain::setFrameCompletedCallback(CallbackHandler* handler, + CallbackHandler::Callback callback, void* user) { + frameCompleted.handler = handler; + frameCompleted.callback = callback; + frameCompleted.user = user; } void MetalSwapChain::present() { - if (frameCompletedCallback) { + if (frameCompleted.callback) { scheduleFrameCompletedCallback(); } if (drawable) { @@ -244,30 +246,17 @@ void presentDrawable(bool presentFrame, void* user) { } void MetalSwapChain::scheduleFrameCompletedCallback() { - if (!frameCompletedCallback) { + if (!frameCompleted.callback) { return; } - FrameCompletedCallback callback = frameCompletedCallback; - void* userData = frameCompletedUserData; + CallbackHandler* handler = frameCompleted.handler; + void* user = frameCompleted.user; + CallbackHandler::Callback callback = frameCompleted.callback; + + MetalDriver* driver = context.driver; [getPendingCommandBuffer(&context) addCompletedHandler:^(id cb) { - struct CallbackData { - void* userData; - FrameCompletedCallback callback; - }; - CallbackData* data = new CallbackData(); - data->userData = userData; - data->callback = callback; - - // Instantiate a BufferDescriptor with a callback for the sole purpose of passing it to - // scheduleDestroy. This forces the BufferDescriptor callback (and thus the - // FrameCompletedCallback) to be called on the user thread. - BufferDescriptor b(nullptr, 0u, [](void* buffer, size_t size, void* user) { - CallbackData* data = (CallbackData*) user; - data->callback(data->userData); - free(data); - }, data); - context.driver->scheduleDestroy(std::move(b)); + driver->scheduleCallback(handler, user, callback); }]; } diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp index a2c6ef8d989..3d1a9cdc327 100644 --- a/filament/backend/src/noop/NoopDriver.cpp +++ b/filament/backend/src/noop/NoopDriver.cpp @@ -58,7 +58,7 @@ void NoopDriver::setFrameScheduledCallback(Handle sch, } void NoopDriver::setFrameCompletedCallback(Handle sch, - FrameCompletedCallback callback, void* user) { + CallbackHandler* handler, CallbackHandler::Callback callback, void* user) { } diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index d15079bb482..e2172ae8add 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -3270,7 +3270,7 @@ void OpenGLDriver::setFrameScheduledCallback(Handle sch, } void OpenGLDriver::setFrameCompletedCallback(Handle sch, - FrameCompletedCallback callback, void* user) { + CallbackHandler* handler, CallbackHandler::Callback callback, void* user) { DEBUG_MARKER() } diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index d7792608023..c1c35014e6b 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -276,7 +276,7 @@ void VulkanDriver::setFrameScheduledCallback(Handle sch, } void VulkanDriver::setFrameCompletedCallback(Handle sch, - FrameCompletedCallback callback, void* user) { + CallbackHandler* handler, CallbackHandler::Callback callback, void* user) { } void VulkanDriver::setPresentationTime(int64_t monotonic_clock_ns) { diff --git a/filament/include/filament/SwapChain.h b/filament/include/filament/SwapChain.h index 9f7a328199e..baa9ae58ca0 100644 --- a/filament/include/filament/SwapChain.h +++ b/filament/include/filament/SwapChain.h @@ -18,10 +18,13 @@ #define TNT_FILAMENT_SWAPCHAIN_H #include + +#include #include #include #include +#include namespace filament { @@ -148,7 +151,7 @@ class Engine; class UTILS_PUBLIC SwapChain : public FilamentAPI { public: using FrameScheduledCallback = backend::FrameScheduledCallback; - using FrameCompletedCallback = backend::FrameCompletedCallback; + using FrameCompletedCallback = backend::CallbackHandler::Callback; /** * Requests a SwapChain with an alpha channel. @@ -241,17 +244,23 @@ class UTILS_PUBLIC SwapChain : public FilamentAPI { * contents have completed rendering on the GPU. * * Use SwapChain::setFrameCompletedCallback to set a callback on an individual SwapChain. Each - * time a frame completes GPU rendering, the callback will be called with optional user data. + * time a frame completes GPU rendering, the callback will be called. * - * The FrameCompletedCallback is guaranteed to be called on the main Filament thread. + * If handler is nullptr, the callback is guaranteed to be called on the main Filament thread. * - * @param callback A callback, or nullptr to unset. - * @param user An optional pointer to user data passed to the callback function. + * Use \c setFrameCompletedCallback() (with default arguments) to unset the callback. + * + * @param handler Handler to dispatch the callback or nullptr for the default handler. + * @param callback Callback called when each frame completes. * * @remark Only Filament's Metal backend supports frame callbacks. Other backends ignore the * callback (which will never be called) and proceed normally. + * + * @see CallbackHandler */ - void setFrameCompletedCallback(FrameCompletedCallback callback, void* user = nullptr); + void setFrameCompletedCallback(backend::CallbackHandler* handler = nullptr, + utils::Invocable&& callback = {}) noexcept; + }; } // namespace filament diff --git a/filament/src/SwapChain.cpp b/filament/src/SwapChain.cpp index ae1498cc916..c30bce69416 100644 --- a/filament/src/SwapChain.cpp +++ b/filament/src/SwapChain.cpp @@ -28,8 +28,9 @@ void SwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void* return downcast(this)->setFrameScheduledCallback(callback, user); } -void SwapChain::setFrameCompletedCallback(FrameCompletedCallback callback, void* user) { - return downcast(this)->setFrameCompletedCallback(callback, user); +void SwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler, + utils::Invocable&& callback) noexcept { + return downcast(this)->setFrameCompletedCallback(handler, std::move(callback)); } bool SwapChain::isSRGBSwapChainSupported(Engine& engine) noexcept { diff --git a/filament/src/details/SwapChain.cpp b/filament/src/details/SwapChain.cpp index ba13be2e2d3..d9cb80911d9 100644 --- a/filament/src/details/SwapChain.cpp +++ b/filament/src/details/SwapChain.cpp @@ -38,8 +38,24 @@ void FSwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void mEngine.getDriverApi().setFrameScheduledCallback(mSwapChain, callback, user); } -void FSwapChain::setFrameCompletedCallback(FrameCompletedCallback callback, void* user) { - mEngine.getDriverApi().setFrameCompletedCallback(mSwapChain, callback, user); +void FSwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler, + utils::Invocable&& callback) noexcept { + struct Callback { + utils::Invocable f; + SwapChain* s; + static void func(void* user) { + auto* const c = reinterpret_cast(user); + c->f(c->s); + delete c; + } + }; + if (callback) { + auto* const user = new(std::nothrow) Callback{ std::move(callback), this }; + mEngine.getDriverApi().setFrameCompletedCallback( + mSwapChain, handler, &Callback::func, static_cast(user)); + } else { + mEngine.getDriverApi().setFrameCompletedCallback(mSwapChain, nullptr, nullptr, nullptr); + } } bool FSwapChain::isSRGBSwapChainSupported(FEngine& engine) noexcept { diff --git a/filament/src/details/SwapChain.h b/filament/src/details/SwapChain.h index c1a3f436d2c..032b5e3f914 100644 --- a/filament/src/details/SwapChain.h +++ b/filament/src/details/SwapChain.h @@ -23,6 +23,9 @@ #include +#include + +#include #include namespace filament { @@ -61,7 +64,8 @@ class FSwapChain : public SwapChain { void setFrameScheduledCallback(FrameScheduledCallback callback, void* user); - void setFrameCompletedCallback(FrameCompletedCallback callback, void* user); + void setFrameCompletedCallback(backend::CallbackHandler* handler, + utils::Invocable&& callback) noexcept; static bool isSRGBSwapChainSupported(FEngine& engine) noexcept; From be1e51ad9161006b85238e723996a16e959401bd Mon Sep 17 00:00:00 2001 From: Ben Doherty Date: Wed, 30 Aug 2023 16:26:15 -0700 Subject: [PATCH 21/23] Update FrameCompletedCallback using directive (#7128) --- filament/include/filament/SwapChain.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filament/include/filament/SwapChain.h b/filament/include/filament/SwapChain.h index baa9ae58ca0..29413275a42 100644 --- a/filament/include/filament/SwapChain.h +++ b/filament/include/filament/SwapChain.h @@ -151,7 +151,7 @@ class Engine; class UTILS_PUBLIC SwapChain : public FilamentAPI { public: using FrameScheduledCallback = backend::FrameScheduledCallback; - using FrameCompletedCallback = backend::CallbackHandler::Callback; + using FrameCompletedCallback = utils::Invocable; /** * Requests a SwapChain with an alpha channel. @@ -259,7 +259,7 @@ class UTILS_PUBLIC SwapChain : public FilamentAPI { * @see CallbackHandler */ void setFrameCompletedCallback(backend::CallbackHandler* handler = nullptr, - utils::Invocable&& callback = {}) noexcept; + FrameCompletedCallback&& callback = {}) noexcept; }; From 8cba3d43667ada4fb88dad92c1c8247299dbbd7d Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Fri, 25 Aug 2023 10:15:01 -0700 Subject: [PATCH 22/23] Revert "workaround another PowerVR compiler bug " This reverts commit 58f96be2c439c0e057bfce335667958e44e2ae07. This caused material files to increase in size significantly. It turns out that glslang has to generate a copy for each parameter that is passed to a function as a non-const parameter. This revert will break IMG devices again, but that should be the case only on debug builds. Release builds lose the const qualifier by virtue of going through spirv. We'll try to address this some other way later. --- filament/src/materials/antiAliasing/fxaa.fs | 2 +- filament/src/materials/antiAliasing/taa.mat | 12 +- .../materials/colorGrading/colorGrading.fs | 2 +- .../materials/colorGrading/colorGrading.mat | 6 +- filament/src/materials/dof/dof.mat | 58 +++++----- filament/src/materials/dof/dofDilate.mat | 4 +- filament/src/materials/dof/dofTiles.mat | 4 +- .../src/materials/dof/dofTilesSwizzle.mat | 4 +- filament/src/materials/dof/dofUtils.fs | 36 +++--- filament/src/materials/flare/flare.mat | 10 +- filament/src/materials/fsr/fsr_easu.mat | 2 +- .../src/materials/fsr/fsr_easu_mobile.mat | 2 +- .../src/materials/fsr/fsr_easu_mobileF.mat | 2 +- .../src/materials/separableGaussianBlur.fs | 8 +- filament/src/materials/ssao/bilateralBlur.mat | 2 +- .../ssao/bilateralBlurBentNormals.mat | 4 +- filament/src/materials/ssao/saoImpl.fs | 10 +- filament/src/materials/ssao/ssct.fs | 8 +- filament/src/materials/utils/depthUtils.fs | 6 +- filament/src/materials/utils/geometry.fs | 14 +-- shaders/src/ambient_occlusion.fs | 8 +- shaders/src/brdf.fs | 12 +- shaders/src/common_defines.glsl | 10 ++ shaders/src/common_getters.glsl | 2 +- shaders/src/common_graphics.fs | 8 +- shaders/src/common_lighting.fs | 4 +- shaders/src/common_material.fs | 8 +- shaders/src/common_math.glsl | 26 ++--- shaders/src/common_shadowing.glsl | 4 +- shaders/src/depth_main.fs | 4 +- shaders/src/dithering.fs | 10 +- shaders/src/getters.vs | 4 +- shaders/src/light_directional.fs | 6 +- shaders/src/light_indirect.fs | 54 ++++----- shaders/src/light_punctual.fs | 18 +-- shaders/src/light_reflections.fs | 12 +- shaders/src/main.fs | 2 +- shaders/src/material_inputs.vs | 4 +- shaders/src/shading_lit.fs | 20 ++-- shaders/src/shading_lit_custom.fs | 4 +- shaders/src/shading_model_cloth.fs | 2 +- shaders/src/shading_model_standard.fs | 14 +-- shaders/src/shading_model_subsurface.fs | 2 +- shaders/src/shading_parameters.fs | 2 +- shaders/src/shading_reflections.fs | 2 +- shaders/src/shading_unlit.fs | 4 +- shaders/src/shadowing.fs | 104 +++++++++--------- shaders/src/vignette.fs | 2 +- 48 files changed, 280 insertions(+), 268 deletions(-) diff --git a/filament/src/materials/antiAliasing/fxaa.fs b/filament/src/materials/antiAliasing/fxaa.fs index db2bb5e260c..e7465720a43 100644 --- a/filament/src/materials/antiAliasing/fxaa.fs +++ b/filament/src/materials/antiAliasing/fxaa.fs @@ -20,7 +20,7 @@ // This substitute for the built-in "mix" function exists to work around #732, // seen with Vulkan on the Pixel 3 + Android P. -vec4 lerp(vec4 x, vec4 y, float a) { +vec4 lerp(const vec4 x, const vec4 y, float a) { return x * (1.0 - a) + y * a; } diff --git a/filament/src/materials/antiAliasing/taa.mat b/filament/src/materials/antiAliasing/taa.mat index 465ed2c4054..22a2d0a506c 100644 --- a/filament/src/materials/antiAliasing/taa.mat +++ b/filament/src/materials/antiAliasing/taa.mat @@ -95,7 +95,7 @@ fragment { #define PREVENT_FLICKERING 0 // FIXME: thin lines disapear -float luma(vec3 color) { +float luma(const vec3 color) { #if USE_YCoCg return color.x; #else @@ -103,14 +103,14 @@ float luma(vec3 color) { #endif } -vec3 RGB_YCoCg(vec3 c) { +vec3 RGB_YCoCg(const vec3 c) { float Y = dot(c.rgb, vec3( 1, 2, 1) * 0.25); float Co = dot(c.rgb, vec3( 2, 0, -2) * 0.25); float Cg = dot(c.rgb, vec3(-1, 2, -1) * 0.25); return vec3(Y, Co, Cg); } -vec3 YCoCg_RGB(vec3 c) { +vec3 YCoCg_RGB(const vec3 c) { float Y = c.x; float Co = c.y; float Cg = c.z; @@ -121,8 +121,8 @@ vec3 YCoCg_RGB(vec3 c) { } // clip the (c, h) segment to a box -vec4 clipToBox(int quality, - vec3 boxmin, vec3 boxmax, vec4 c, vec4 h) { +vec4 clipToBox(const int quality, + const vec3 boxmin, const vec3 boxmax, const vec4 c, const vec4 h) { const float epsilon = 0.0001; if (quality == BOX_CLIPPING_ACCURATE) { @@ -144,7 +144,7 @@ vec4 clipToBox(int quality, // http://vec3.ca/bicubic-filtering-in-fewer-taps/ for more details // Optimized to 5 taps by removing the corner samples // And modified for mediump support -vec4 sampleTextureCatmullRom(sampler2D tex, highp vec2 uv, highp vec2 texSize) { +vec4 sampleTextureCatmullRom(const sampler2D tex, const highp vec2 uv, const highp vec2 texSize) { // We're going to sample a a 4x4 grid of texels surrounding the target UV coordinate. We'll do this by rounding // down the sample location to get the exact center of our "starting" texel. The starting texel will be at // location [1, 1] in the grid, where [0, 0] is the top left corner. diff --git a/filament/src/materials/colorGrading/colorGrading.fs b/filament/src/materials/colorGrading/colorGrading.fs index 928a31fe331..2b2000142d9 100644 --- a/filament/src/materials/colorGrading/colorGrading.fs +++ b/filament/src/materials/colorGrading/colorGrading.fs @@ -1,4 +1,4 @@ -vec3 colorGrade(mediump sampler3D lut, vec3 x) { +vec3 colorGrade(mediump sampler3D lut, const vec3 x) { // Alexa LogC EI 1000 const float a = 5.555556; const float b = 0.047996; diff --git a/filament/src/materials/colorGrading/colorGrading.mat b/filament/src/materials/colorGrading/colorGrading.mat index 2d5b0214633..7667b42d9b9 100644 --- a/filament/src/materials/colorGrading/colorGrading.mat +++ b/filament/src/materials/colorGrading/colorGrading.mat @@ -89,7 +89,7 @@ fragment { void dummy(){} -float starburst(vec2 uv) { +float starburst(const vec2 uv) { // get an offset that continuously moves with the camera vec3 forward = getViewFromWorldMatrix()[2].xyz; float offset = forward.x + forward.y + forward.z; @@ -104,7 +104,7 @@ float starburst(vec2 uv) { return saturate(mask + (1.0 - smoothstep(0.0, 0.3, d))); } -vec3 bloom(highp vec2 uv, vec3 color) { +vec3 bloom(highp vec2 uv, const vec3 color) { vec3 result = vec3(0.0); if (materialParams.bloom.x > 0.0) { @@ -128,7 +128,7 @@ vec3 bloom(highp vec2 uv, vec3 color) { return result; } -vec4 resolveFragment(highp vec2 uv) { +vec4 resolveFragment(const highp vec2 uv) { #if POST_PROCESS_OPAQUE return vec4(textureLod(materialParams_colorBuffer, uv, 0.0).rgb, 1.0); #else diff --git a/filament/src/materials/dof/dof.mat b/filament/src/materials/dof/dof.mat index 8db4d8d5c67..b6bb5b1ea78 100644 --- a/filament/src/materials/dof/dof.mat +++ b/filament/src/materials/dof/dof.mat @@ -108,16 +108,16 @@ float ringCountFast() { return materialParams.ringCounts[2]; } -float sampleCount(float ringCount) { +float sampleCount(const float ringCount) { float s = (ringCount * 2.0 - 1.0); return s * s; } -float computeNeededRings(float kernelSizeInPixel) { +float computeNeededRings(const float kernelSizeInPixel) { return ceil(kernelSizeInPixel + 0.5); } -float getMipLevel(float ringCount, float kernelSizeInPixels) { +float getMipLevel(const float ringCount, const float kernelSizeInPixels) { #if KERNEL_USE_MIPMAP // note: the 0.5 is to convert from highres to our downslampled texture float ringDistanceInTexels = kernelSizeInPixels * rcp(ringCount - 0.5); @@ -140,7 +140,7 @@ float getMipLevel(float ringCount, float kernelSizeInPixels) { #endif } -float sampleWeight(float coc, float mip) { +float sampleWeight(const float coc, const float mip) { // The contribution of sample is inversely proportional to *its* area // (the larger area, the fainter it is). // In theory this factor should be 1 / pi * radius^2, however 1/pi is a constant, and @@ -159,7 +159,7 @@ float sampleWeight(float coc, float mip) { return (MAX_COC_RADIUS * MAX_COC_RADIUS) / (max(coc * coc, pixelRadiusSquared)); } -float intersection(float border, float absCoc, float mip) { +float intersection(const float border, const float absCoc, const float mip) { // there is very little visible difference, so use the cheaper version on mobile #if FILAMENT_QUALITY < FILAMENT_QUALITY_HIGH return saturate((absCoc - border) + 0.5); @@ -168,7 +168,7 @@ float intersection(float border, float absCoc, float mip) { #endif } -highp vec2 diaphragm(highp vec2 center, vec2 offset) { +highp vec2 diaphragm(const highp vec2 center, const vec2 offset) { #if DOF_DIAPHRAGM == DOF_DIAPHRAGM_CIRCLE return center + offset; #elif DOF_DIAPHRAGM == DOF_DIAPHRAGM_STRAIGHT_BLADES @@ -217,7 +217,7 @@ void initBucket(out Bucket ring) { ring.coc = 0.0; } -void initRing(float i, float ringCount, float kernelSize, vec2 noise, +void initRing(const float i, const float ringCount, const float kernelSize, const vec2 noise, out float offset, out float count, out mat2 r, out vec2 p) { float radius = (kernelSize / (ringCount - 0.5)) * i; @@ -240,7 +240,7 @@ void initRing(float i, float ringCount, float kernelSize, vec2 noise, offset = length(p + noise); } -void mergeRings(inout Bucket curr, inout Bucket prev, float count) { +void mergeRings(inout Bucket curr, inout Bucket prev, const float count) { if (curr.cw >= MEDIUMP_FLT_MIN) { // "Life of a Bokeh", SIGGRAPH 2018 -- slide 32 // How much the current ring is occluding the previous ring -- we estimate this based @@ -265,8 +265,8 @@ void mergeRings(inout Bucket curr, inout Bucket prev, float count) { } } -void accumulate(inout Bucket curr, inout Bucket prev, Sample tap, - float radius, float border, float mip, bool first) { +void accumulate(inout Bucket curr, inout Bucket prev, const Sample tap, + const float radius, const float border, const float mip, const bool first) { float inLayer = tap.inLayer; float coc = abs(tap.coc); float w = inLayer * intersection(radius, coc, mip) * sampleWeight(coc, mip); @@ -299,8 +299,8 @@ void accumulate(inout Bucket curr, inout Bucket prev, Sample tap, } } -void accumulateBackground(inout Bucket curr, inout Bucket prev, highp vec2 pos, - float radius, float border, float mip, bool first) { +void accumulateBackground(inout Bucket curr, inout Bucket prev, const highp vec2 pos, + const float radius, const float border, const float mip, const bool first) { Sample tap; tap.s = textureLod(materialParams_color, pos, mip); tap.coc = textureLod(materialParams_coc, pos, mip).r; @@ -309,15 +309,15 @@ void accumulateBackground(inout Bucket curr, inout Bucket prev, highp vec2 pos, } void accumulateBackgroundMirror(inout Bucket curr, inout Bucket prev, - highp vec2 center, vec2 offset, - float radius, float border, float mip, bool first) { + const highp vec2 center, const vec2 offset, + const float radius, const float border, const float mip, const bool first) { accumulateBackground(curr, prev, diaphragm(center, offset), radius, border, mip, first); accumulateBackground(curr, prev, diaphragm(center, -offset), radius, border, mip, first); } void accumulateBackgroundCenter(inout Bucket prev, - highp vec2 pos, float ringCount, - float kernelSize, float noiseRadius, float mip) { + const highp vec2 pos, const float ringCount, + const float kernelSize, const float noiseRadius, const float mip) { Bucket curr; initBucket(curr); float border = (kernelSize / (ringCount - 0.5)) * 0.5; @@ -327,9 +327,9 @@ void accumulateBackgroundCenter(inout Bucket prev, mergeRings(curr, prev, 1.0); } -void accumulateRing(inout Bucket prev, float index, float ringCount, - float kernelSize, vec2 noise, highp vec2 uvCenter, - highp vec2 cocToTexelScale, float mip, bool first) { +void accumulateRing(inout Bucket prev, const float index, const float ringCount, + const float kernelSize, const vec2 noise, const highp vec2 uvCenter, + const highp vec2 cocToTexelScale, const float mip, const bool first) { // we accumulate the larger rings first float i = (ringCount - 1.0) - index; @@ -378,12 +378,12 @@ float computeLayer(float coc, vec2 tiles) { return saturate((coc - tiles.r - frontLayerCocSize) * invTransitionSize); } -float foregroundFadding(float coc) { +float foregroundFadding(const float coc) { return saturate(abs(coc) - (MAX_IN_FOCUS_COC - 1.0)); } -void accumulateForeground(inout Layer layer[2], vec2 tiles, - highp vec2 pos, float border, float mip) { +void accumulateForeground(inout Layer layer[2], const vec2 tiles, + const highp vec2 pos, const float border, const float mip) { float coc = textureLod(materialParams_coc, pos, mip).r; vec4 s = textureLod(materialParams_color, pos, mip); @@ -399,13 +399,13 @@ void accumulateForeground(inout Layer layer[2], vec2 tiles, layer[1].weight += inBackWeight; } -void accumulateForegroundCenter(inout Layer layer[2], vec2 tiles, - highp vec2 pos, float border, float mip) { +void accumulateForegroundCenter(inout Layer layer[2], const vec2 tiles, + const highp vec2 pos, const float border, const float mip) { accumulateForeground(layer, tiles, pos, border, mip); } -void accumulateForegroundMirror(inout Layer layer[2], vec2 tiles, - highp vec2 center, vec2 offset, float border, float mip) { +void accumulateForegroundMirror(inout Layer layer[2], const vec2 tiles, + const highp vec2 center, const vec2 offset, const float border, const float mip) { // The code below is equivalent to: // accumulateForeground(layer, tiles, diaphragm(center, offset), border, mip); // accumulateForeground(layer, tiles, diaphragm(center, -offset), border, mip); @@ -440,7 +440,7 @@ void accumulateForegroundMirror(inout Layer layer[2], vec2 tiles, */ void fastTile(inout vec4 color, inout float alpha, - highp vec2 uv, vec2 tiles, NoiseState noiseState) { + const highp vec2 uv, const vec2 tiles, const NoiseState noiseState) { highp vec2 cocToTexelScale = materialParams.cocToTexelScale; float cocToPixelScale = materialParams.cocToPixelScale; @@ -481,7 +481,7 @@ void fastTile(inout vec4 color, inout float alpha, } void foregroundTile(inout vec4 foreground, inout float fgOpacity, - highp vec2 uv, vec2 tiles, NoiseState noiseState) { + const highp vec2 uv, const vec2 tiles, const NoiseState noiseState) { highp vec2 cocToTexelScale = materialParams.cocToTexelScale; float cocToPixelScale = materialParams.cocToPixelScale; @@ -536,7 +536,7 @@ void foregroundTile(inout vec4 foreground, inout float fgOpacity, } void backgroundTile(inout vec4 background, inout float bgOpacity, - highp vec2 uv, vec2 tiles, NoiseState noiseState) { + const highp vec2 uv, const vec2 tiles, const NoiseState noiseState) { highp vec2 cocToTexelScale = materialParams.cocToTexelScale; float cocToPixelScale = materialParams.cocToPixelScale; diff --git a/filament/src/materials/dof/dofDilate.mat b/filament/src/materials/dof/dofDilate.mat index 74eff7f1487..ca6deb8d3fb 100644 --- a/filament/src/materials/dof/dofDilate.mat +++ b/filament/src/materials/dof/dofDilate.mat @@ -30,11 +30,11 @@ void dummy(){} // Size of a tile in full-resolution pixels -- must match tileSize in PostProcessManager.cpp #define TILE_SIZE 16.0 -vec2 tap(vec2 uv, vec2 offset) { +vec2 tap(const vec2 uv, const vec2 offset) { return textureLod(materialParams_tiles, uv + offset, 0.0).rg; } -vec2 dilate(inout vec2 center, vec2 tap) { +vec2 dilate(inout vec2 center, const vec2 tap) { // Tiles that can affect us need to transfer their CoC to us (for the foreground it's // min(ourCoc, theirCoc) because CoC are negative (the min is the larger radius). center.r = min(center.r, tap.r); diff --git a/filament/src/materials/dof/dofTiles.mat b/filament/src/materials/dof/dofTiles.mat index 3628ff84925..b3e857867d2 100644 --- a/filament/src/materials/dof/dofTiles.mat +++ b/filament/src/materials/dof/dofTiles.mat @@ -32,12 +32,12 @@ fragment { void dummy(){} -float max4(vec4 f) { +float max4(const vec4 f) { vec2 t = max(f.xy, f.zw); return max(t.x, t.y); } -float min4(vec4 f) { +float min4(const vec4 f) { vec2 t = min(f.xy, f.zw); return min(t.x, t.y); } diff --git a/filament/src/materials/dof/dofTilesSwizzle.mat b/filament/src/materials/dof/dofTilesSwizzle.mat index a46cf6f9e25..393d7ec9ec8 100644 --- a/filament/src/materials/dof/dofTilesSwizzle.mat +++ b/filament/src/materials/dof/dofTilesSwizzle.mat @@ -32,12 +32,12 @@ fragment { void dummy(){} -float max4(vec4 f) { +float max4(const vec4 f) { vec2 t = max(f.xy, f.zw); return max(t.x, t.y); } -float min4(vec4 f) { +float min4(const vec4 f) { vec2 t = min(f.xy, f.zw); return min(t.x, t.y); } diff --git a/filament/src/materials/dof/dofUtils.fs b/filament/src/materials/dof/dofUtils.fs index fac7aef1984..b7d921ead3a 100644 --- a/filament/src/materials/dof/dofUtils.fs +++ b/filament/src/materials/dof/dofUtils.fs @@ -17,68 +17,68 @@ // Currently our dilate pass is set-up for 32 max. #define MAX_COC_RADIUS 32.0 -float min2(vec2 v) { +float min2(const vec2 v) { return min(v.x, v.y); } -float max2(vec2 v) { +float max2(const vec2 v) { return max(v.x, v.y); } -float max4(vec4 v) { +float max4(const vec4 v) { return max2(max(v.xy, v.zw)); } -float min4(vec4 v) { +float min4(const vec4 v) { return min2(min(v.xy, v.zw)); } -float rcp(float x) { +float rcp(const float x) { return 1.0 / x; } -float rcpOrZero(float x) { +float rcpOrZero(const float x) { return x > MEDIUMP_FLT_MIN ? (1.0 / x) : 0.0; } -highp float rcpOrZeroHighp(highp float x) { +highp float rcpOrZeroHighp(const highp float x) { return x > MEDIUMP_FLT_MIN ? (1.0 / x) : 0.0; } -float cocToAlpha(float coc) { +float cocToAlpha(const float coc) { // CoC is positive for background field. // CoC is negative for the foreground field. return saturate(abs(coc) - MAX_IN_FOCUS_COC); } // returns circle-of-confusion diameter in pixels -float getCOC(float depth, vec2 cocParams) { +float getCOC(const float depth, const vec2 cocParams) { return depth * cocParams.x + cocParams.y; } -vec4 getCOC(vec4 depth, vec2 cocParams) { +vec4 getCOC(const vec4 depth, const vec2 cocParams) { return depth * cocParams.x + cocParams.y; } -float isForeground(float coc) { +float isForeground(const float coc) { return coc < 0.0 ? 1.0 : 0.0; } -float isBackground(float coc) { +float isBackground(const float coc) { return coc > 0.0 ? 1.0 : 0.0; } -bool isForegroundTile(vec2 tiles) { +bool isForegroundTile(const vec2 tiles) { // A foreground tile is one where the smallest CoC is negative return tiles.r < 0.0; } -bool isBackgroundTile(vec2 tiles) { +bool isBackgroundTile(const vec2 tiles) { // A background tile is one where the largest CoC is positive return tiles.g > 0.0; } -bool isFastTile(vec2 tiles) { +bool isFastTile(const vec2 tiles) { // We use the distance between the min and max CoC and if the relative error is less than // 5% we assume the tile contains a constant CoC. // We could cannot use the absolute value of the min/mac CoC -- which would categorize more @@ -87,19 +87,19 @@ bool isFastTile(vec2 tiles) { return (tiles.g - tiles.r) <= abs(tiles.g) * 0.05; } -bool isTrivialTile(vec2 tiles) { +bool isTrivialTile(const vec2 tiles) { float maxCocRadius = max(abs(tiles.r), abs(tiles.g)); return maxCocRadius < MAX_IN_FOCUS_COC; } -float downsampleCoC(vec4 c) { +float downsampleCoC(const vec4 c) { // We need to compute a suitable CoC to represent the 4 pixels that are downsampled. // We pick the min because this always selects the most foreground sample if there is one, // because the foreground can leak on the background, but not the reverse. return min4(c); } -vec4 downsampleCocWeights(vec4 c, float outCoc, float scale) { +vec4 downsampleCocWeights(const vec4 c, const float outCoc, const float scale) { // The bilateral weight is normally computed as saturate(1 - |outCoc - c|) which selects // the sample with the outCoc weight (and does a little bit of cross-fade if other samples // are close). However, this can also cause some aliasing with dithered objects, so by diff --git a/filament/src/materials/flare/flare.mat b/filament/src/materials/flare/flare.mat index 6ace6d34ee2..70e7ed4b100 100644 --- a/filament/src/materials/flare/flare.mat +++ b/filament/src/materials/flare/flare.mat @@ -78,15 +78,15 @@ fragment { void dummy(){ } -float smoothstep01(float t) { +float smoothstep01(const float t) { return t * t * (3.0 - 2.0 * t); } -float ring(float x, float c, float r) { +float ring(float x, const float c, const float r) { return 1.0 - smoothstep01(min(abs(x - c) / r, 1.0)); } -vec3 sampleColor(vec2 uv) { +vec3 sampleColor(const vec2 uv) { // because we work on reduced resolution, we assume mediump uv is enough. float level = materialParams.level; float chromaticAberration = materialParams.chromaticAberration; @@ -99,7 +99,7 @@ vec3 sampleColor(vec2 uv) { ); } -vec3 ghosts(vec2 vertex_uv) { +vec3 ghosts(const vec2 vertex_uv) { // because we work on reduced resolution, we assume mediump uv is enough. float ghostSpacing = materialParams.ghostSpacing; float ghostCount = materialParams.ghostCount; @@ -116,7 +116,7 @@ vec3 ghosts(vec2 vertex_uv) { return color; } -vec3 halo(vec2 uv) { +vec3 halo(const vec2 uv) { float haloRadius = materialParams.haloRadius; float haloThickness = materialParams.haloThickness; float threshold = materialParams.threshold.y; diff --git a/filament/src/materials/fsr/fsr_easu.mat b/filament/src/materials/fsr/fsr_easu.mat index 145d63dc9af..4de510ed96f 100644 --- a/filament/src/materials/fsr/fsr_easu.mat +++ b/filament/src/materials/fsr/fsr_easu.mat @@ -72,7 +72,7 @@ fragment { #if defined(FILAMENT_HAS_FEATURE_TEXTURE_GATHER) #define gather textureGather #else - vec4 gather(mediump sampler2D color, highp vec2 p, int comp) { + vec4 gather(const mediump sampler2D color, highp vec2 p, const int comp) { highp ivec2 i = ivec2(p * materialParams.textureSize - 0.5); vec4 d; d[0] = texelFetchOffset(color, i, 0, ivec2(0, 1))[comp]; diff --git a/filament/src/materials/fsr/fsr_easu_mobile.mat b/filament/src/materials/fsr/fsr_easu_mobile.mat index d7435a60322..8648c33a4ea 100644 --- a/filament/src/materials/fsr/fsr_easu_mobile.mat +++ b/filament/src/materials/fsr/fsr_easu_mobile.mat @@ -75,7 +75,7 @@ fragment { #if defined(FILAMENT_HAS_FEATURE_TEXTURE_GATHER) #define gather textureGather #else - vec4 gather(mediump sampler2D color, highp vec2 p, int comp) { + vec4 gather(const mediump sampler2D color, highp vec2 p, const int comp) { highp ivec2 i = ivec2(p * materialParams.textureSize - 0.5); vec4 d; d[0] = texelFetchOffset(color, i, 0, ivec2(0, 1))[comp]; diff --git a/filament/src/materials/fsr/fsr_easu_mobileF.mat b/filament/src/materials/fsr/fsr_easu_mobileF.mat index 18fdc07b369..f18886b2538 100644 --- a/filament/src/materials/fsr/fsr_easu_mobileF.mat +++ b/filament/src/materials/fsr/fsr_easu_mobileF.mat @@ -76,7 +76,7 @@ fragment { #if defined(FILAMENT_HAS_FEATURE_TEXTURE_GATHER) #define gather textureGather #else - vec4 gather(mediump sampler2D color, highp vec2 p, int comp) { + vec4 gather(const mediump sampler2D color, highp vec2 p, const int comp) { highp ivec2 i = ivec2(p * materialParams.textureSize - 0.5); vec4 d; d[0] = texelFetchOffset(color, i, 0, ivec2(0, 1))[comp]; diff --git a/filament/src/materials/separableGaussianBlur.fs b/filament/src/materials/separableGaussianBlur.fs index b99d16e28c6..1667eae239e 100644 --- a/filament/src/materials/separableGaussianBlur.fs +++ b/filament/src/materials/separableGaussianBlur.fs @@ -1,8 +1,8 @@ -float vmax(float v) { +float vmax(const float v) { return v; } -highp vec4 sourceTexLod(highp vec2 p, float m, float l) { +highp vec4 sourceTexLod(const highp vec2 p, float m, float l) { // This condition is optimized away at compile-time. if (materialConstants_arraySampler) { return textureLod(materialParams_sourceArray, vec3(p, l), m); @@ -11,12 +11,12 @@ highp vec4 sourceTexLod(highp vec2 p, float m, float l) { } } -void tap(inout highp vec4 sum, float weight, highp vec2 position) { +void tap(inout highp vec4 sum, const float weight, const highp vec2 position) { highp vec4 s = sourceTexLod(position, materialParams.level, materialParams.layer); sum += s * weight; } -void tapReinhard(inout highp vec4 sum, inout float totalWeight, float weight, highp vec2 position) { +void tapReinhard(inout highp vec4 sum, inout float totalWeight, const float weight, const highp vec2 position) { highp vec4 s = sourceTexLod(position, materialParams.level, materialParams.layer); float w = weight / (1.0 + vmax(s)); totalWeight += w; diff --git a/filament/src/materials/ssao/bilateralBlur.mat b/filament/src/materials/ssao/bilateralBlur.mat index b2c2c71e44f..137b88709b2 100644 --- a/filament/src/materials/ssao/bilateralBlur.mat +++ b/filament/src/materials/ssao/bilateralBlur.mat @@ -48,7 +48,7 @@ fragment { return max(0.0, 1.0 - diff * diff); } - void tap(highp sampler2DArray saoTexture, + void tap(const highp sampler2DArray saoTexture, inout float sum, inout float totalWeight, float weight, float depth, vec2 position) { // ambient occlusion sample vec3 data = textureLod(saoTexture, vec3(position, 0.0), 0.0).rgb; diff --git a/filament/src/materials/ssao/bilateralBlurBentNormals.mat b/filament/src/materials/ssao/bilateralBlurBentNormals.mat index cc022f04070..11e81f6f239 100644 --- a/filament/src/materials/ssao/bilateralBlurBentNormals.mat +++ b/filament/src/materials/ssao/bilateralBlurBentNormals.mat @@ -60,14 +60,14 @@ fragment { return max(0.0, 1.0 - diff * diff); } - void tapAO(highp sampler2DArray saoTexture, highp vec2 uv, + void tapAO(const highp sampler2DArray saoTexture, highp vec2 uv, out float ao, out highp float sampleDepth) { vec3 data = textureLod(saoTexture, vec3(uv, 0.0), 0.0).rgb; ao = data.r; sampleDepth = unpack(data.gb); } - void tapBN(highp sampler2DArray saoTexture, highp vec2 uv, + void tapBN(const highp sampler2DArray saoTexture, highp vec2 uv, out vec3 bentNormal) { vec3 data = textureLod(saoTexture, vec3(uv, 1.0), 0.0).xyz; bentNormal = unpackBentNormal(data); diff --git a/filament/src/materials/ssao/saoImpl.fs b/filament/src/materials/ssao/saoImpl.fs index c86eb023dd1..57f96141e1f 100644 --- a/filament/src/materials/ssao/saoImpl.fs +++ b/filament/src/materials/ssao/saoImpl.fs @@ -34,14 +34,14 @@ const float kLog2LodRate = 3.0; // "The Alchemy Screen-Space Ambient Obscurance Algorithm" by Morgan McGuire // "Scalable Ambient Obscurance" by Morgan McGuire, Michael Mara and David Luebke -vec3 tapLocation(float i, float noise) { +vec3 tapLocation(float i, const float noise) { float offset = ((2.0 * PI) * 2.4) * noise; float angle = ((i * materialParams.sampleCount.y) * materialParams.spiralTurns) * (2.0 * PI) + offset; float radius = (i + noise + 0.5) * materialParams.sampleCount.y; return vec3(cos(angle), sin(angle), radius * radius); } -highp vec2 startPosition(float noise) { +highp vec2 startPosition(const float noise) { float angle = ((2.0 * PI) * 2.4) * noise; return vec2(cos(angle), sin(angle)); } @@ -51,15 +51,15 @@ highp mat2 tapAngleStep() { return mat2(t.x, t.y, -t.y, t.x); } -vec3 tapLocationFast(float i, vec2 p, float noise) { +vec3 tapLocationFast(float i, vec2 p, const float noise) { float radius = (i + noise + 0.5) * materialParams.sampleCount.y; return vec3(p, radius * radius); } void computeAmbientOcclusionSAO(inout float occlusion, inout vec3 bentNormal, float i, float ssDiskRadius, - highp vec2 uv, highp vec3 origin, vec3 normal, - vec2 tapPosition, float noise) { + const highp vec2 uv, const highp vec3 origin, const vec3 normal, + const vec2 tapPosition, const float noise) { vec3 tap = tapLocationFast(i, tapPosition, noise); diff --git a/filament/src/materials/ssao/ssct.fs b/filament/src/materials/ssao/ssct.fs index b283a1af24f..65c9b2f4701 100644 --- a/filament/src/materials/ssao/ssct.fs +++ b/filament/src/materials/ssao/ssct.fs @@ -52,7 +52,7 @@ struct ConeTraceSetup { uint sampleCount; }; -highp float getWFromProjectionMatrix(highp mat4 p, vec3 v) { +highp float getWFromProjectionMatrix(const highp mat4 p, const vec3 v) { // this essentially returns (p * vec4(v, 1.0)).w, but we make some assumptions // this assumes a perspective projection return -v.z; @@ -60,14 +60,14 @@ highp float getWFromProjectionMatrix(highp mat4 p, vec3 v) { //return p[2][3] * v.z + p[3][3]; } -highp float getViewSpaceZFromW(highp mat4 p, float w) { +highp float getViewSpaceZFromW(const highp mat4 p, const float w) { // this assumes a perspective projection return -w; // this assumes a perspective or ortho projection //return (w - p[3][3]) / p[2][3]; } -float coneTraceOcclusion(in ConeTraceSetup setup, highp sampler2D depthTexture) { +float coneTraceOcclusion(in ConeTraceSetup setup, const highp sampler2D depthTexture) { // skip fragments that are back-facing trace direction // (avoid overshadowing of translucent surfaces) float NoL = dot(setup.vsNormal, setup.vsConeDirection); @@ -138,7 +138,7 @@ float coneTraceOcclusion(in ConeTraceSetup setup, highp sampler2D depthTexture) } float ssctDominantLightShadowing(highp vec2 uv, highp vec3 origin, vec3 normal, - highp sampler2D depthTexture, highp vec2 fragCoord, + const highp sampler2D depthTexture, const highp vec2 fragCoord, vec2 rayCount, ConeTraceSetup cone) { float occlusion = 0.0; diff --git a/filament/src/materials/utils/depthUtils.fs b/filament/src/materials/utils/depthUtils.fs index 0832fef6374..58a3045d33e 100644 --- a/filament/src/materials/utils/depthUtils.fs +++ b/filament/src/materials/utils/depthUtils.fs @@ -28,12 +28,12 @@ highp float linearizeDepth(highp float depth) { return (depth * p[2].z + p[3].z) / max(depth * p[2].w + p[3].w, preventDiv0); } -highp float sampleDepth(highp sampler2D depthTexture, highp vec2 uv, float lod) { +highp float sampleDepth(const highp sampler2D depthTexture, const highp vec2 uv, float lod) { return textureLod(depthTexture, uvToRenderTargetUV(uv), lod).r; } -highp float sampleDepthLinear(highp sampler2D depthTexture, - highp vec2 uv, float lod) { +highp float sampleDepthLinear(const highp sampler2D depthTexture, + const highp vec2 uv, float lod) { return linearizeDepth(sampleDepth(depthTexture, uv, lod)); } diff --git a/filament/src/materials/utils/geometry.fs b/filament/src/materials/utils/geometry.fs index b305c772603..a1a5062100f 100644 --- a/filament/src/materials/utils/geometry.fs +++ b/filament/src/materials/utils/geometry.fs @@ -36,7 +36,7 @@ highp vec3 faceNormal(highp vec3 dpdx, highp vec3 dpdy) { // this creates arifacts around geometry edges. // Note: when using the spirv optimizer, this results in much slower execution time because // this whole expression is inlined in the AO loop below. -highp vec3 computeViewSpaceNormalLowQ(highp vec3 position) { +highp vec3 computeViewSpaceNormalLowQ(const highp vec3 position) { return faceNormal(dFdx(position), dFdy(position)); } @@ -52,8 +52,8 @@ highp vec3 computeViewSpaceNormalLowQ(highp vec3 position) { // positionParams : invProjection[0][0] * 2, invProjection[1][1] * 2 // highp vec3 computeViewSpaceNormalMediumQ( - highp sampler2D depthTexture, highp vec2 uv, - highp vec3 position, + const highp sampler2D depthTexture, const highp vec2 uv, + const highp vec3 position, highp vec2 texel, highp vec2 positionParams) { precision highp float; @@ -81,8 +81,8 @@ highp vec3 computeViewSpaceNormalMediumQ( // positionParams : invProjection[0][0] * 2, invProjection[1][1] * 2 // highp vec3 computeViewSpaceNormalHighQ( - highp sampler2D depthTexture, highp vec2 uv, - highp float depth, highp vec3 position, + const highp sampler2D depthTexture, const highp vec2 uv, + const highp float depth, const highp vec3 position, highp vec2 texel, highp vec2 positionParams) { precision highp float; @@ -125,8 +125,8 @@ highp vec3 computeViewSpaceNormalHighQ( // positionParams : invProjection[0][0] * 2, invProjection[1][1] * 2 // highp vec3 computeViewSpaceNormal( - highp sampler2D depthTexture, highp vec2 uv, - highp float depth, highp vec3 position, + const highp sampler2D depthTexture, const highp vec2 uv, + const highp float depth, const highp vec3 position, highp vec2 texel, highp vec2 positionParams) { // todo: maybe make this a quality parameter #if FILAMENT_QUALITY == FILAMENT_QUALITY_HIGH diff --git a/shaders/src/ambient_occlusion.fs b/shaders/src/ambient_occlusion.fs index 0eae9a66db6..32f6421986e 100644 --- a/shaders/src/ambient_occlusion.fs +++ b/shaders/src/ambient_occlusion.fs @@ -139,7 +139,7 @@ vec3 unpackBentNormal(vec3 bn) { return bn * 2.0 - 1.0; } -float specularAO(float NoV, float visibility, float roughness, SSAOInterpolationCache cache) { +float specularAO(float NoV, float visibility, float roughness, const in SSAOInterpolationCache cache) { float specularAO = 1.0; // SSAO is not applied when blending is enabled @@ -200,7 +200,7 @@ float specularAO(float NoV, float visibility, float roughness, SSAOInterpolation * The albedo term is meant to be the diffuse color or f0 for the diffuse and * specular terms respectively. */ -vec3 gtaoMultiBounce(float visibility, vec3 albedo) { +vec3 gtaoMultiBounce(float visibility, const vec3 albedo) { // Jimenez et al. 2016, "Practical Realtime Strategies for Accurate Indirect Occlusion" vec3 a = 2.0404 * albedo - 0.3324; vec3 b = -4.7951 * albedo + 0.6417; @@ -210,13 +210,13 @@ vec3 gtaoMultiBounce(float visibility, vec3 albedo) { } #endif -void multiBounceAO(float visibility, vec3 albedo, inout vec3 color) { +void multiBounceAO(float visibility, const vec3 albedo, inout vec3 color) { #if MULTI_BOUNCE_AMBIENT_OCCLUSION == 1 color *= gtaoMultiBounce(visibility, albedo); #endif } -void multiBounceSpecularAO(float visibility, vec3 albedo, inout vec3 color) { +void multiBounceSpecularAO(float visibility, const vec3 albedo, inout vec3 color) { #if MULTI_BOUNCE_AMBIENT_OCCLUSION == 1 && SPECULAR_AMBIENT_OCCLUSION != SPECULAR_AO_OFF color *= gtaoMultiBounce(visibility, albedo); #endif diff --git a/shaders/src/brdf.fs b/shaders/src/brdf.fs index 9d11990c54b..f4204cf44de 100644 --- a/shaders/src/brdf.fs +++ b/shaders/src/brdf.fs @@ -51,7 +51,7 @@ // Specular BRDF implementations //------------------------------------------------------------------------------ -float D_GGX(float roughness, float NoH, vec3 h) { +float D_GGX(float roughness, float NoH, const vec3 h) { // Walter et al. 2007, "Microfacet Models for Refraction through Rough Surfaces" // In mediump, there are two problems computing 1.0 - NoH^2 @@ -138,12 +138,12 @@ float V_Neubelt(float NoV, float NoL) { return saturateMediump(1.0 / (4.0 * (NoL + NoV - NoL * NoV))); } -vec3 F_Schlick(vec3 f0, float f90, float VoH) { +vec3 F_Schlick(const vec3 f0, float f90, float VoH) { // Schlick 1994, "An Inexpensive BRDF Model for Physically-Based Rendering" return f0 + (f90 - f0) * pow5(1.0 - VoH); } -vec3 F_Schlick(vec3 f0, float VoH) { +vec3 F_Schlick(const vec3 f0, float VoH) { float f = pow(1.0 - VoH, 5.0); return f + f0 * (1.0 - f); } @@ -156,7 +156,7 @@ float F_Schlick(float f0, float f90, float VoH) { // Specular BRDF dispatch //------------------------------------------------------------------------------ -float distribution(float roughness, float NoH, vec3 h) { +float distribution(float roughness, float NoH, const vec3 h) { #if BRDF_SPECULAR_D == SPECULAR_D_GGX return D_GGX(roughness, NoH, h); #endif @@ -170,7 +170,7 @@ float visibility(float roughness, float NoV, float NoL) { #endif } -vec3 fresnel(vec3 f0, float LoH) { +vec3 fresnel(const vec3 f0, float LoH) { #if BRDF_SPECULAR_F == SPECULAR_F_SCHLICK #if FILAMENT_QUALITY == FILAMENT_QUALITY_LOW return F_Schlick(f0, LoH); // f90 = 1.0 @@ -196,7 +196,7 @@ float visibilityAnisotropic(float roughness, float at, float ab, #endif } -float distributionClearCoat(float roughness, float NoH, vec3 h) { +float distributionClearCoat(float roughness, float NoH, const vec3 h) { #if BRDF_CLEAR_COAT_D == SPECULAR_D_GGX return D_GGX(roughness, NoH, h); #endif diff --git a/shaders/src/common_defines.glsl b/shaders/src/common_defines.glsl index a28a6da7ccf..eb855244105 100644 --- a/shaders/src/common_defines.glsl +++ b/shaders/src/common_defines.glsl @@ -22,3 +22,13 @@ #define float3x3 mat3 #define float4x4 mat4 + +// To workaround an adreno crash (#5294), we need ensure that a method with +// parameter 'const mat4' does not call another method also with a 'const mat4' +// parameter (i.e. mulMat4x4Float3). So we remove the const modifier for +// materials compiled for vulkan+mobile. +#if defined(TARGET_VULKAN_ENVIRONMENT) && defined(TARGET_MOBILE) + #define highp_mat4 highp mat4 +#else + #define highp_mat4 const highp mat4 +#endif diff --git a/shaders/src/common_getters.glsl b/shaders/src/common_getters.glsl index 27544f84768..54ee7be33dd 100644 --- a/shaders/src/common_getters.glsl +++ b/shaders/src/common_getters.glsl @@ -70,7 +70,7 @@ highp float getUserTimeMod(float m) { * * @public-api */ -highp vec2 uvToRenderTargetUV(highp vec2 uv) { +highp vec2 uvToRenderTargetUV(const highp vec2 uv) { #if defined(TARGET_METAL_ENVIRONMENT) || defined(TARGET_VULKAN_ENVIRONMENT) return vec2(uv.x, 1.0 - uv.y); #else diff --git a/shaders/src/common_graphics.fs b/shaders/src/common_graphics.fs index 7c0e9e897e5..3cebbb4d55a 100644 --- a/shaders/src/common_graphics.fs +++ b/shaders/src/common_graphics.fs @@ -8,7 +8,7 @@ * * @public-api */ -float luminance(vec3 linear) { +float luminance(const vec3 linear) { return dot(linear, vec3(0.2126, 0.7152, 0.0722)); } @@ -16,7 +16,7 @@ float luminance(vec3 linear) { * Computes the pre-exposed intensity using the specified intensity and exposure. * This function exists to force highp precision on the two parameters */ -float computePreExposedIntensity(highp float intensity, highp float exposure) { +float computePreExposedIntensity(const highp float intensity, const highp float exposure) { return intensity * exposure; } @@ -47,7 +47,7 @@ vec3 ycbcrToRgb(float luminance, vec2 cbcr) { /* * The input must be in the [0, 1] range. */ -vec3 Inverse_Tonemap_Filmic(vec3 x) { +vec3 Inverse_Tonemap_Filmic(const vec3 x) { return (0.03 - 0.59 * x - sqrt(0.0009 + 1.3702 * x - 1.0127 * x * x)) / (-5.02 + 4.86 * x); } @@ -94,7 +94,7 @@ vec3 decodeRGBM(vec4 c) { // returns the frag coord in the GL convention with (0, 0) at the bottom-left // resolution : width, height -highp vec2 getFragCoord(highp vec2 resolution) { +highp vec2 getFragCoord(const highp vec2 resolution) { #if defined(TARGET_METAL_ENVIRONMENT) || defined(TARGET_VULKAN_ENVIRONMENT) return vec2(gl_FragCoord.x, resolution.y - gl_FragCoord.y); #else diff --git a/shaders/src/common_lighting.fs b/shaders/src/common_lighting.fs index bad9aed1f1d..2f822460de8 100644 --- a/shaders/src/common_lighting.fs +++ b/shaders/src/common_lighting.fs @@ -82,7 +82,7 @@ float computeMicroShadowing(float NoL, float visibility) { * direction to match reference renderings when the roughness increases */ -vec3 getReflectedVector(PixelParams pixel, vec3 v, vec3 n) { +vec3 getReflectedVector(const PixelParams pixel, const vec3 v, const vec3 n) { #if defined(MATERIAL_HAS_ANISOTROPY) vec3 anisotropyDirection = pixel.anisotropy >= 0.0 ? pixel.anisotropicB : pixel.anisotropicT; vec3 anisotropicTangent = cross(anisotropyDirection, v); @@ -97,7 +97,7 @@ vec3 getReflectedVector(PixelParams pixel, vec3 v, vec3 n) { return r; } -void getAnisotropyPixelParams(MaterialInputs material, inout PixelParams pixel) { +void getAnisotropyPixelParams(const MaterialInputs material, inout PixelParams pixel) { #if defined(MATERIAL_HAS_ANISOTROPY) vec3 direction = material.anisotropyDirection; pixel.anisotropy = material.anisotropy; diff --git a/shaders/src/common_material.fs b/shaders/src/common_material.fs index e556160c20c..53cc8acb0d8 100644 --- a/shaders/src/common_material.fs +++ b/shaders/src/common_material.fs @@ -14,11 +14,11 @@ float clampNoV(float NoV) { return max(NoV, MIN_N_DOT_V); } -vec3 computeDiffuseColor(vec4 baseColor, float metallic) { +vec3 computeDiffuseColor(const vec4 baseColor, float metallic) { return baseColor.rgb * (1.0 - metallic); } -vec3 computeF0(vec4 baseColor, float metallic, float reflectance) { +vec3 computeF0(const vec4 baseColor, float metallic, float reflectance) { return baseColor.rgb * metallic + (reflectance * (1.0 - metallic)); } @@ -26,7 +26,7 @@ float computeDielectricF0(float reflectance) { return 0.16 * reflectance * reflectance; } -float computeMetallicFromSpecularColor(vec3 specularColor) { +float computeMetallicFromSpecularColor(const vec3 specularColor) { return max3(specularColor); } @@ -51,7 +51,7 @@ float f0ToIor(float f0) { return (1.0 + r) / (1.0 - r); } -vec3 f0ClearCoatToSurface(vec3 f0) { +vec3 f0ClearCoatToSurface(const vec3 f0) { // Approximation of iorTof0(f0ToIor(f0), 1.5) // This assumes that the clear coat layer has an IOR of 1.5 #if FILAMENT_QUALITY == FILAMENT_QUALITY_LOW diff --git a/shaders/src/common_math.glsl b/shaders/src/common_math.glsl index b01a80099ad..98b205f65dc 100644 --- a/shaders/src/common_math.glsl +++ b/shaders/src/common_math.glsl @@ -53,19 +53,19 @@ float sq(float x) { * * @public-api */ -float max3(vec3 v) { +float max3(const vec3 v) { return max(v.x, max(v.y, v.z)); } -float vmax(vec2 v) { +float vmax(const vec2 v) { return max(v.x, v.y); } -float vmax(vec3 v) { +float vmax(const vec3 v) { return max(v.x, max(v.y, v.z)); } -float vmax(vec4 v) { +float vmax(const vec4 v) { return max(max(v.x, v.y), max(v.y, v.z)); } @@ -74,19 +74,19 @@ float vmax(vec4 v) { * * @public-api */ -float min3(vec3 v) { +float min3(const vec3 v) { return min(v.x, min(v.y, v.z)); } -float vmin(vec2 v) { +float vmin(const vec2 v) { return min(v.x, v.y); } -float vmin(vec3 v) { +float vmin(const vec3 v) { return min(v.x, min(v.y, v.z)); } -float vmin(vec4 v) { +float vmin(const vec4 v) { return min(min(v.x, v.y), min(v.y, v.z)); } @@ -126,7 +126,7 @@ float acosFastPositive(float x) { * * @public-api */ -highp vec4 mulMat4x4Float3(highp mat4 m, highp vec3 v) { +highp vec4 mulMat4x4Float3(const highp mat4 m, const highp vec3 v) { return v.x * m[0] + (v.y * m[1] + (v.z * m[2] + m[3])); } @@ -136,14 +136,14 @@ highp vec4 mulMat4x4Float3(highp mat4 m, highp vec3 v) { * * @public-api */ -highp vec3 mulMat3x3Float3(highp mat4 m, highp vec3 v) { +highp vec3 mulMat3x3Float3(const highp mat4 m, const highp vec3 v) { return v.x * m[0].xyz + (v.y * m[1].xyz + (v.z * m[2].xyz)); } /** * Extracts the normal vector of the tangent frame encoded in the specified quaternion. */ -void toTangentFrame(highp vec4 q, out highp vec3 n) { +void toTangentFrame(const highp vec4 q, out highp vec3 n) { n = vec3( 0.0, 0.0, 1.0) + vec3( 2.0, -2.0, -2.0) * q.x * q.zwx + vec3( 2.0, 2.0, -2.0) * q.y * q.wzy; @@ -153,14 +153,14 @@ void toTangentFrame(highp vec4 q, out highp vec3 n) { * Extracts the normal and tangent vectors of the tangent frame encoded in the * specified quaternion. */ -void toTangentFrame(highp vec4 q, out highp vec3 n, out highp vec3 t) { +void toTangentFrame(const highp vec4 q, out highp vec3 n, out highp vec3 t) { toTangentFrame(q, n); t = vec3( 1.0, 0.0, 0.0) + vec3(-2.0, 2.0, -2.0) * q.y * q.yxw + vec3(-2.0, 2.0, 2.0) * q.z * q.zwx; } -highp mat3 cofactor(highp mat3 m) { +highp mat3 cofactor(const highp mat3 m) { highp float a = m[0][0]; highp float b = m[1][0]; highp float c = m[2][0]; diff --git a/shaders/src/common_shadowing.glsl b/shaders/src/common_shadowing.glsl index 5ea08d53faa..66ee6320e99 100644 --- a/shaders/src/common_shadowing.glsl +++ b/shaders/src/common_shadowing.glsl @@ -11,8 +11,8 @@ * Normal bias is not used for VSM. */ -highp vec4 computeLightSpacePosition(highp vec3 p, highp vec3 n, - highp vec3 dir, float b, highp mat4 lightFromWorldMatrix) { +highp vec4 computeLightSpacePosition(highp vec3 p, const highp vec3 n, + const highp vec3 dir, const float b, highp_mat4 lightFromWorldMatrix) { #if !defined(VARIANT_HAS_VSM) highp float cosTheta = saturate(dot(n, dir)); diff --git a/shaders/src/depth_main.fs b/shaders/src/depth_main.fs index 8e99014d6aa..26641c4c1f3 100644 --- a/shaders/src/depth_main.fs +++ b/shaders/src/depth_main.fs @@ -20,7 +20,7 @@ layout(location = 0) out highp vec2 outPicking; // note: VARIANT_HAS_VSM and VARIANT_HAS_PICKING are mutually exclusive //------------------------------------------------------------------------------ -highp vec2 computeDepthMomentsVSM(highp float depth); +highp vec2 computeDepthMomentsVSM(const highp float depth); void main() { filament_lodBias = frameUniforms.lodBias; @@ -74,7 +74,7 @@ void main() { #endif } -highp vec2 computeDepthMomentsVSM(highp float depth) { +highp vec2 computeDepthMomentsVSM(const highp float depth) { // computes the moments // See GPU Gems 3 // https://developer.nvidia.com/gpugems/gpugems3/part-ii-light-and-shadows/chapter-8-summed-area-variance-shadow-maps diff --git a/shaders/src/dithering.fs b/shaders/src/dithering.fs index b3211c99337..66b7f334888 100644 --- a/shaders/src/dithering.fs +++ b/shaders/src/dithering.fs @@ -30,7 +30,7 @@ float triangleNoise(highp vec2 n) { // Dithering //------------------------------------------------------------------------------ -vec4 Dither_InterleavedGradientNoise(vec4 rgba, highp float temporalNoise01) { +vec4 Dither_InterleavedGradientNoise(vec4 rgba, const highp float temporalNoise01) { // Jimenez 2014, "Next Generation Post-Processing in Call of Duty" highp vec2 uv = gl_FragCoord.xy + temporalNoise01; @@ -43,7 +43,7 @@ vec4 Dither_InterleavedGradientNoise(vec4 rgba, highp float temporalNoise01) { return rgba + vec4(noise / 255.0); } -vec4 Dither_TriangleNoise(vec4 rgba, highp float temporalNoise01) { +vec4 Dither_TriangleNoise(vec4 rgba, const highp float temporalNoise01) { // Gjøl 2016, "Banding in Games: A Noisy Rant" highp vec2 fragCoord = gl_FragCoord.xy; // FIXME: resolution.zw is the viewport dimension but we should be using the buffer's @@ -58,7 +58,7 @@ vec4 Dither_TriangleNoise(vec4 rgba, highp float temporalNoise01) { return rgba + vec4(noise / 255.0); } -vec4 Dither_Vlachos(vec4 rgba, highp float temporalNoise01) { +vec4 Dither_Vlachos(vec4 rgba, const highp float temporalNoise01) { // Vlachos 2016, "Advanced VR Rendering" highp vec2 fragCoord = gl_FragCoord.xy; float noise = dot(vec2(171.0, 231.0), fragCoord + temporalNoise01); @@ -70,7 +70,7 @@ vec4 Dither_Vlachos(vec4 rgba, highp float temporalNoise01) { return vec4(rgba.rgb + (noiseRGB / 255.0), rgba.a); } -vec4 Dither_TriangleNoiseRGB(vec4 rgba, highp float temporalNoise01) { +vec4 Dither_TriangleNoiseRGB(vec4 rgba, const highp float temporalNoise01) { // Gjøl 2016, "Banding in Games: A Noisy Rant" highp vec2 fragCoord = gl_FragCoord.xy; // FIXME: resolution.zw is the viewport dimension but we should be using the buffer's @@ -97,7 +97,7 @@ vec4 Dither_TriangleNoiseRGB(vec4 rgba, highp float temporalNoise01) { * This dithering function assumes we are dithering to an 8-bit target. * This function dithers the alpha channel assuming premultiplied output */ -vec4 dither(vec4 rgba, highp float temporalNoise01) { +vec4 dither(vec4 rgba, const highp float temporalNoise01) { #if DITHERING_OPERATOR == DITHERING_NONE return rgba; #elif DITHERING_OPERATOR == DITHERING_INTERLEAVED_NOISE diff --git a/shaders/src/getters.vs b/shaders/src/getters.vs index 51f950dece9..cc06a19f5c3 100644 --- a/shaders/src/getters.vs +++ b/shaders/src/getters.vs @@ -62,14 +62,14 @@ vec3 mulBoneVertex(vec3 v, uint i) { return v.x * m[0].xyz + (v.y * m[1].xyz + (v.z * m[2].xyz + m[3].xyz)); } -void skinNormal(inout vec3 n, uvec4 ids, vec4 weights) { +void skinNormal(inout vec3 n, const uvec4 ids, const vec4 weights) { n = mulBoneNormal(n, ids.x) * weights.x + mulBoneNormal(n, ids.y) * weights.y + mulBoneNormal(n, ids.z) * weights.z + mulBoneNormal(n, ids.w) * weights.w; } -void skinPosition(inout vec3 p, uvec4 ids, vec4 weights) { +void skinPosition(inout vec3 p, const uvec4 ids, const vec4 weights) { p = mulBoneVertex(p, ids.x) * weights.x + mulBoneVertex(p, ids.y) * weights.y + mulBoneVertex(p, ids.z) * weights.z diff --git a/shaders/src/light_directional.fs b/shaders/src/light_directional.fs index 47bff67f24a..4a4ad944cc9 100644 --- a/shaders/src/light_directional.fs +++ b/shaders/src/light_directional.fs @@ -6,7 +6,7 @@ #define SUN_AS_AREA_LIGHT #endif -vec3 sampleSunAreaLight(vec3 lightDirection) { +vec3 sampleSunAreaLight(const vec3 lightDirection) { #if defined(SUN_AS_AREA_LIGHT) if (frameUniforms.sun.w >= 0.0) { // simulate sun as disc area light @@ -31,8 +31,8 @@ Light getDirectionalLight() { return light; } -void evaluateDirectionalLight(MaterialInputs material, - PixelParams pixel, inout vec3 color) { +void evaluateDirectionalLight(const MaterialInputs material, + const PixelParams pixel, inout vec3 color) { Light light = getDirectionalLight(); diff --git a/shaders/src/light_indirect.fs b/shaders/src/light_indirect.fs index 8bf14fc7503..5084b10b64d 100644 --- a/shaders/src/light_indirect.fs +++ b/shaders/src/light_indirect.fs @@ -17,7 +17,7 @@ // IBL utilities //------------------------------------------------------------------------------ -vec3 decodeDataForIBL(vec4 data) { +vec3 decodeDataForIBL(const vec4 data) { return data.rgb; } @@ -43,7 +43,7 @@ vec3 prefilteredDFG(float perceptualRoughness, float NoV) { // IBL irradiance implementations //------------------------------------------------------------------------------ -vec3 Irradiance_SphericalHarmonics(vec3 n) { +vec3 Irradiance_SphericalHarmonics(const vec3 n) { return max( frameUniforms.iblSH[0] #if SPHERICAL_HARMONICS_BANDS >= 2 @@ -61,7 +61,7 @@ vec3 Irradiance_SphericalHarmonics(vec3 n) { , 0.0); } -vec3 Irradiance_RoughnessOne(vec3 n) { +vec3 Irradiance_RoughnessOne(const vec3 n) { // note: lod used is always integer, hopefully the hardware skips tri-linear filtering return decodeDataForIBL(textureLod(light_iblSpecular, n, frameUniforms.iblRoughnessOneLevel)); } @@ -70,7 +70,7 @@ vec3 Irradiance_RoughnessOne(vec3 n) { // IBL irradiance dispatch //------------------------------------------------------------------------------ -vec3 diffuseIrradiance(vec3 n) { +vec3 diffuseIrradiance(const vec3 n) { // On Metal devices with an A8X chipset, this light_iblSpecular texture sample must be pulled // outside the frameUniforms.iblSH check. This is to avoid a Metal pipeline compilation error // with the message: "Could not statically determine the target of a texture". @@ -118,21 +118,21 @@ float perceptualRoughnessToLod(float perceptualRoughness) { return frameUniforms.iblRoughnessOneLevel * perceptualRoughness * (2.0 - perceptualRoughness); } -vec3 prefilteredRadiance(vec3 r, float perceptualRoughness) { +vec3 prefilteredRadiance(const vec3 r, float perceptualRoughness) { float lod = perceptualRoughnessToLod(perceptualRoughness); return decodeDataForIBL(textureLod(light_iblSpecular, r, lod)); } -vec3 prefilteredRadiance(vec3 r, float roughness, float offset) { +vec3 prefilteredRadiance(const vec3 r, float roughness, float offset) { float lod = frameUniforms.iblRoughnessOneLevel * roughness; return decodeDataForIBL(textureLod(light_iblSpecular, r, lod + offset)); } -vec3 getSpecularDominantDirection(vec3 n, vec3 r, float roughness) { +vec3 getSpecularDominantDirection(const vec3 n, const vec3 r, float roughness) { return mix(r, n, roughness * roughness); } -vec3 specularDFG(PixelParams pixel) { +vec3 specularDFG(const PixelParams pixel) { #if defined(SHADING_MODEL_CLOTH) return pixel.f0 * pixel.dfg.z; #else @@ -140,7 +140,7 @@ vec3 specularDFG(PixelParams pixel) { #endif } -vec3 getReflectedVector(PixelParams pixel, vec3 n) { +vec3 getReflectedVector(const PixelParams pixel, const vec3 n) { #if defined(MATERIAL_HAS_ANISOTROPY) vec3 r = getReflectedVector(pixel, shading_view, n); #else @@ -223,7 +223,7 @@ float prefilteredImportanceSampling(float ipdf, float omegaP) { return mipLevel; } -vec3 isEvaluateSpecularIBL(PixelParams pixel, vec3 n, vec3 v, float NoV) { +vec3 isEvaluateSpecularIBL(const PixelParams pixel, const vec3 n, const vec3 v, const float NoV) { const int numSamples = IBL_INTEGRATION_IMPORTANCE_SAMPLING_COUNT; const float invNumSamples = 1.0 / float(numSamples); const vec3 up = vec3(0.0, 0.0, 1.0); @@ -282,7 +282,7 @@ vec3 isEvaluateSpecularIBL(PixelParams pixel, vec3 n, vec3 v, float NoV) { return indirectSpecular; } -vec3 isEvaluateDiffuseIBL(PixelParams pixel, vec3 n, vec3 v) { +vec3 isEvaluateDiffuseIBL(const PixelParams pixel, vec3 n, vec3 v) { const int numSamples = IBL_INTEGRATION_IMPORTANCE_SAMPLING_COUNT; const float invNumSamples = 1.0 / float(numSamples); const vec3 up = vec3(0.0, 0.0, 1.0); @@ -332,7 +332,7 @@ vec3 isEvaluateDiffuseIBL(PixelParams pixel, vec3 n, vec3 v) { return indirectDiffuse * invNumSamples; // we bake 1/PI here, which cancels out } -void isEvaluateClearCoatIBL(PixelParams pixel, float specularAO, inout vec3 Fd, inout vec3 Fr) { +void isEvaluateClearCoatIBL(const PixelParams pixel, float specularAO, inout vec3 Fd, inout vec3 Fr) { #if defined(MATERIAL_HAS_CLEAR_COAT) #if defined(MATERIAL_HAS_NORMAL) || defined(MATERIAL_HAS_CLEAR_COAT_NORMAL) // We want to use the geometric normal for the clear coat layer @@ -366,7 +366,7 @@ void isEvaluateClearCoatIBL(PixelParams pixel, float specularAO, inout vec3 Fd, // IBL evaluation //------------------------------------------------------------------------------ -void evaluateClothIndirectDiffuseBRDF(PixelParams pixel, inout float diffuse) { +void evaluateClothIndirectDiffuseBRDF(const PixelParams pixel, inout float diffuse) { #if defined(SHADING_MODEL_CLOTH) #if defined(MATERIAL_HAS_SUBSURFACE_COLOR) // Simulate subsurface scattering with a wrap diffuse term @@ -375,8 +375,8 @@ void evaluateClothIndirectDiffuseBRDF(PixelParams pixel, inout float diffuse) { #endif } -void evaluateSheenIBL(PixelParams pixel, float diffuseAO, - SSAOInterpolationCache cache, inout vec3 Fd, inout vec3 Fr) { +void evaluateSheenIBL(const PixelParams pixel, float diffuseAO, + const in SSAOInterpolationCache cache, inout vec3 Fd, inout vec3 Fr) { #if !defined(SHADING_MODEL_CLOTH) && !defined(SHADING_MODEL_SUBSURFACE) #if defined(MATERIAL_HAS_SHEEN_COLOR) // Albedo scaling of the base layer before we layer sheen on top @@ -391,8 +391,8 @@ void evaluateSheenIBL(PixelParams pixel, float diffuseAO, #endif } -void evaluateClearCoatIBL(PixelParams pixel, float diffuseAO, - SSAOInterpolationCache cache, inout vec3 Fd, inout vec3 Fr) { +void evaluateClearCoatIBL(const PixelParams pixel, float diffuseAO, + const in SSAOInterpolationCache cache, inout vec3 Fd, inout vec3 Fr) { #if IBL_INTEGRATION == IBL_INTEGRATION_IMPORTANCE_SAMPLING float specularAO = specularAO(shading_NoV, diffuseAO, pixel.clearCoatRoughness, cache); isEvaluateClearCoatIBL(pixel, specularAO, Fd, Fr); @@ -422,7 +422,7 @@ void evaluateClearCoatIBL(PixelParams pixel, float diffuseAO, #endif } -void evaluateSubsurfaceIBL(PixelParams pixel, vec3 diffuseIrradiance, +void evaluateSubsurfaceIBL(const PixelParams pixel, const vec3 diffuseIrradiance, inout vec3 Fd, inout vec3 Fr) { #if defined(SHADING_MODEL_SUBSURFACE) vec3 viewIndependent = diffuseIrradiance; @@ -442,8 +442,8 @@ struct Refraction { float d; }; -void refractionSolidSphere(PixelParams pixel, - vec3 n, vec3 r, out Refraction ray) { +void refractionSolidSphere(const PixelParams pixel, + const vec3 n, vec3 r, out Refraction ray) { r = refract(r, n, pixel.etaIR); float NoR = dot(n, r); float d = pixel.thickness * -NoR; @@ -453,8 +453,8 @@ void refractionSolidSphere(PixelParams pixel, ray.direction = refract(r, n1, pixel.etaRI); } -void refractionSolidBox(PixelParams pixel, - vec3 n, vec3 r, out Refraction ray) { +void refractionSolidBox(const PixelParams pixel, + const vec3 n, vec3 r, out Refraction ray) { vec3 rr = refract(r, n, pixel.etaIR); float NoR = dot(n, rr); float d = pixel.thickness / max(-NoR, 0.001); @@ -468,8 +468,8 @@ void refractionSolidBox(PixelParams pixel, #endif } -void refractionThinSphere(PixelParams pixel, - vec3 n, vec3 r, out Refraction ray) { +void refractionThinSphere(const PixelParams pixel, + const vec3 n, vec3 r, out Refraction ray) { float d = 0.0; #if defined(MATERIAL_HAS_MICRO_THICKNESS) // note: we need the refracted ray to calculate the distance traveled @@ -485,7 +485,9 @@ void refractionThinSphere(PixelParams pixel, ray.d = d; } -vec3 evaluateRefraction(PixelParams pixel, vec3 n0, vec3 E) { +vec3 evaluateRefraction( + const PixelParams pixel, + const vec3 n0, vec3 E) { Refraction ray; @@ -554,7 +556,7 @@ vec3 evaluateRefraction(PixelParams pixel, vec3 n0, vec3 E) { } #endif -void evaluateIBL(MaterialInputs material, PixelParams pixel, inout vec3 color) { +void evaluateIBL(const MaterialInputs material, const PixelParams pixel, inout vec3 color) { // specular layer vec3 Fr = vec3(0.0); diff --git a/shaders/src/light_punctual.fs b/shaders/src/light_punctual.fs index b2c12930766..1c465a7f8f1 100644 --- a/shaders/src/light_punctual.fs +++ b/shaders/src/light_punctual.fs @@ -20,7 +20,7 @@ struct FroxelParams { * Returns the coordinates of the froxel at the specified fragment coordinates. * The coordinates are a 3D position in the froxel grid. */ -uvec3 getFroxelCoords(highp vec3 fragCoords) { +uvec3 getFroxelCoords(const highp vec3 fragCoords) { uvec3 froxelCoord; froxelCoord.xy = uvec2(fragCoords.xy * frameUniforms.froxelCountXY); @@ -48,7 +48,7 @@ uvec3 getFroxelCoords(highp vec3 fragCoords) { * The froxel index is computed from the 3D coordinates of the froxel in the * froxel grid and later used to fetch from the froxel buffer. */ -uint getFroxelIndex(highp vec3 fragCoords) { +uint getFroxelIndex(const highp vec3 fragCoords) { uvec3 froxelCoord = getFroxelCoords(fragCoords); return froxelCoord.x * frameUniforms.fParams.x + froxelCoord.y * frameUniforms.fParams.y + @@ -66,7 +66,7 @@ ivec2 getFroxelTexCoord(uint froxelIndex) { * Returns the froxel data for the given froxel index. The data is fetched * from FroxelsUniforms UBO. */ -FroxelParams getFroxelParams(uint froxelIndex) { +FroxelParams getFroxelParams(const uint froxelIndex) { uint w = froxelIndex >> 2u; uint c = froxelIndex & 0x3u; highp uvec4 d = froxelsUniforms.records[w]; @@ -81,7 +81,7 @@ FroxelParams getFroxelParams(uint froxelIndex) { * Return the light index from the record index * A light record is a single uint index into the lights data buffer (lightsUniforms UBO). */ -uint getLightIndex(uint index) { +uint getLightIndex(const uint index) { uint v = index >> 4u; uint c = (index >> 2u) & 0x3u; uint s = (index & 0x3u) * 8u; @@ -98,7 +98,7 @@ float getSquareFalloffAttenuation(float distanceSquare, float falloff) { return smoothFactor * smoothFactor; } -float getDistanceAttenuation(highp vec3 posToLight, float falloff) { +float getDistanceAttenuation(const highp vec3 posToLight, float falloff) { float distanceSquare = dot(posToLight, posToLight); float attenuation = getSquareFalloffAttenuation(distanceSquare, falloff); // light far attenuation @@ -109,7 +109,7 @@ float getDistanceAttenuation(highp vec3 posToLight, float falloff) { return attenuation / max(distanceSquare, 1e-4); } -float getAngleAttenuation(highp vec3 lightDir, highp vec3 l, highp vec2 scaleOffset) { +float getAngleAttenuation(const highp vec3 lightDir, const highp vec3 l, const highp vec2 scaleOffset) { float cd = dot(lightDir, l); float attenuation = saturate(cd * scaleOffset.x + scaleOffset.y); return attenuation * attenuation; @@ -124,7 +124,7 @@ float getAngleAttenuation(highp vec3 lightDir, highp vec3 l, highp vec2 scaleOff * lightsUniforms uniform buffer. */ -Light getLight(uint lightIndex) { +Light getLight(const uint lightIndex) { // retrieve the light data from the UBO highp mat4 data = lightsUniforms.lights[lightIndex]; @@ -176,8 +176,8 @@ Light getLight(uint lightIndex) { * The result of the lighting computations is accumulated in the color * parameter, as linear HDR RGB. */ -void evaluatePunctualLights(MaterialInputs material, - PixelParams pixel, inout vec3 color) { +void evaluatePunctualLights(const MaterialInputs material, + const PixelParams pixel, inout vec3 color) { // Fetch the light information stored in the froxel that contains the // current fragment diff --git a/shaders/src/light_reflections.fs b/shaders/src/light_reflections.fs index 499b001090f..ce6be8f7baa 100644 --- a/shaders/src/light_reflections.fs +++ b/shaders/src/light_reflections.fs @@ -57,10 +57,10 @@ highp float distanceSquared(highp vec2 a, highp vec2 b) { // Note: McGuire and Mara use the "cs" prefix to stand for "camera space", equivalent to Filament's // "view space". "cs" has been replaced with "vs" to avoid confusion. -bool traceScreenSpaceRay(highp vec3 vsOrigin, highp vec3 vsDirection, - highp mat4 uvFromViewMatrix, highp sampler2D vsZBuffer, - float vsZThickness, highp float nearPlaneZ, float stride, - float jitterFraction, highp float maxSteps, float maxRayTraceDistance, +bool traceScreenSpaceRay(const highp vec3 vsOrigin, const highp vec3 vsDirection, + highp_mat4 uvFromViewMatrix, const highp sampler2D vsZBuffer, + const float vsZThickness, const highp float nearPlaneZ, const float stride, + const float jitterFraction, const highp float maxSteps, const float maxRayTraceDistance, out highp vec2 hitPixel, out highp vec3 vsHitPoint) { // Clip ray to a near plane in 3D (doesn't have to be *the* near plane, although that would be a // good idea) @@ -181,7 +181,7 @@ bool traceScreenSpaceRay(highp vec3 vsOrigin, highp vec3 vsDirection, // -- end "BSD 2-clause license" ------------------------------------------------------------------- -highp mat4 scaleMatrix(highp float x, highp float y) { +highp mat4 scaleMatrix(const highp float x, const highp float y) { mat4 m = mat4(1.0); m[0].x = x; m[1].y = y; @@ -200,7 +200,7 @@ highp mat4 scaleMatrix(highp float x, highp float y) { * * If there is no hit, the return value is vec4(0). */ -vec4 evaluateScreenSpaceReflections(highp vec3 wsRayDirection) { +vec4 evaluateScreenSpaceReflections(const highp vec3 wsRayDirection) { vec4 Fr = vec4(0.0); highp vec3 wsRayStart = shading_position + frameUniforms.ssrBias * wsRayDirection; diff --git a/shaders/src/main.fs b/shaders/src/main.fs index 2a6ec540415..62ac9d65a7d 100644 --- a/shaders/src/main.fs +++ b/shaders/src/main.fs @@ -5,7 +5,7 @@ layout(location = 0) out vec4 fragColor; #endif #if defined(MATERIAL_HAS_POST_LIGHTING_COLOR) -void blendPostLightingColor(MaterialInputs material, inout vec4 color) { +void blendPostLightingColor(const MaterialInputs material, inout vec4 color) { #if defined(POST_LIGHTING_BLEND_MODE_OPAQUE) color = material.postLightingColor; #elif defined(POST_LIGHTING_BLEND_MODE_TRANSPARENT) diff --git a/shaders/src/material_inputs.vs b/shaders/src/material_inputs.vs index 7c0dc9f3140..2124b7896ce 100644 --- a/shaders/src/material_inputs.vs +++ b/shaders/src/material_inputs.vs @@ -33,13 +33,13 @@ struct MaterialVertexInputs { // Workaround for a driver bug on ARM Bifrost GPUs. Assigning a structure member // (directly or inside an expression) to an invariant causes a driver crash. -vec4 getWorldPosition(MaterialVertexInputs material) { +vec4 getWorldPosition(const MaterialVertexInputs material) { return material.worldPosition; } #ifdef VERTEX_DOMAIN_DEVICE #ifdef MATERIAL_HAS_CLIP_SPACE_TRANSFORM -mat4 getMaterialClipSpaceTransform(MaterialVertexInputs material) { +mat4 getMaterialClipSpaceTransform(const MaterialVertexInputs material) { return material.clipSpaceTransform; } #endif // MATERIAL_HAS_CLIP_SPACE_TRANSFORM diff --git a/shaders/src/shading_lit.fs b/shaders/src/shading_lit.fs index 26ae84bb906..7a1d4e7b677 100644 --- a/shaders/src/shading_lit.fs +++ b/shaders/src/shading_lit.fs @@ -38,7 +38,7 @@ void applyAlphaMask(inout vec4 baseColor) {} #endif #if defined(GEOMETRIC_SPECULAR_AA) -float normalFiltering(float perceptualRoughness, vec3 worldNormal) { +float normalFiltering(float perceptualRoughness, const vec3 worldNormal) { // Kaplanyan 2016, "Stable specular highlights" // Tokuyoshi 2017, "Error Reduction and Simplification for Shading Anti-Aliasing" // Tokuyoshi and Kaplanyan 2019, "Improved Geometric Specular Antialiasing" @@ -63,7 +63,7 @@ float normalFiltering(float perceptualRoughness, vec3 worldNormal) { } #endif -void getCommonPixelParams(MaterialInputs material, inout PixelParams pixel) { +void getCommonPixelParams(const MaterialInputs material, inout PixelParams pixel) { vec4 baseColor = material.baseColor; applyAlphaMask(baseColor); @@ -137,7 +137,7 @@ void getCommonPixelParams(MaterialInputs material, inout PixelParams pixel) { #endif } -void getSheenPixelParams(MaterialInputs material, inout PixelParams pixel) { +void getSheenPixelParams(const MaterialInputs material, inout PixelParams pixel) { #if defined(MATERIAL_HAS_SHEEN_COLOR) && !defined(SHADING_MODEL_CLOTH) && !defined(SHADING_MODEL_SUBSURFACE) pixel.sheenColor = material.sheenColor; @@ -154,7 +154,7 @@ void getSheenPixelParams(MaterialInputs material, inout PixelParams pixel) { #endif } -void getClearCoatPixelParams(MaterialInputs material, inout PixelParams pixel) { +void getClearCoatPixelParams(const MaterialInputs material, inout PixelParams pixel) { #if defined(MATERIAL_HAS_CLEAR_COAT) pixel.clearCoat = material.clearCoat; @@ -181,7 +181,7 @@ void getClearCoatPixelParams(MaterialInputs material, inout PixelParams pixel) { #endif } -void getRoughnessPixelParams(MaterialInputs material, inout PixelParams pixel) { +void getRoughnessPixelParams(const MaterialInputs material, inout PixelParams pixel) { #if defined(SHADING_MODEL_SPECULAR_GLOSSINESS) float perceptualRoughness = computeRoughnessFromGlossiness(material.glossiness); #else @@ -209,7 +209,7 @@ void getRoughnessPixelParams(MaterialInputs material, inout PixelParams pixel) { pixel.roughness = perceptualRoughnessToRoughness(pixel.perceptualRoughness); } -void getSubsurfacePixelParams(MaterialInputs material, inout PixelParams pixel) { +void getSubsurfacePixelParams(const MaterialInputs material, inout PixelParams pixel) { #if defined(SHADING_MODEL_SUBSURFACE) pixel.subsurfacePower = material.subsurfacePower; pixel.subsurfaceColor = material.subsurfaceColor; @@ -245,7 +245,7 @@ void getEnergyCompensationPixelParams(inout PixelParams pixel) { * This function is also responsible for discarding the fragment when alpha * testing fails. */ -void getPixelParams(MaterialInputs material, out PixelParams pixel) { +void getPixelParams(const MaterialInputs material, out PixelParams pixel) { getCommonPixelParams(material, pixel); getSheenPixelParams(material, pixel); getClearCoatPixelParams(material, pixel); @@ -265,7 +265,7 @@ void getPixelParams(MaterialInputs material, out PixelParams pixel) { * * Returns a pre-exposed HDR RGBA color in linear space. */ -vec4 evaluateLights(MaterialInputs material) { +vec4 evaluateLights(const MaterialInputs material) { PixelParams pixel; getPixelParams(material, pixel); @@ -295,7 +295,7 @@ vec4 evaluateLights(MaterialInputs material) { return vec4(color, computeDiffuseAlpha(material.baseColor.a)); } -void addEmissive(MaterialInputs material, inout vec4 color) { +void addEmissive(const MaterialInputs material, inout vec4 color) { #if defined(MATERIAL_HAS_EMISSIVE) highp vec4 emissive = material.emissive; highp float attenuation = mix(1.0, getExposure(), emissive.w); @@ -309,7 +309,7 @@ void addEmissive(MaterialInputs material, inout vec4 color) { * * Returns a pre-exposed HDR RGBA color in linear space. */ -vec4 evaluateMaterial(MaterialInputs material) { +vec4 evaluateMaterial(const MaterialInputs material) { vec4 color = evaluateLights(material); addEmissive(material, color); return color; diff --git a/shaders/src/shading_lit_custom.fs b/shaders/src/shading_lit_custom.fs index cc699b228e9..439e3ed19e4 100644 --- a/shaders/src/shading_lit_custom.fs +++ b/shaders/src/shading_lit_custom.fs @@ -1,5 +1,5 @@ -vec3 customSurfaceShading(MaterialInputs materialInputs, - PixelParams pixel, Light light, float visibility) { +vec3 customSurfaceShading(const MaterialInputs materialInputs, + const PixelParams pixel, const Light light, float visibility) { LightData lightData; lightData.colorIntensity = light.colorIntensity; diff --git a/shaders/src/shading_model_cloth.fs b/shaders/src/shading_model_cloth.fs index 655ea46c446..6bc8d2c513a 100644 --- a/shaders/src/shading_model_cloth.fs +++ b/shaders/src/shading_model_cloth.fs @@ -9,7 +9,7 @@ * computation of these events is not physically based but can add necessary * details to a material. */ -vec3 surfaceShading(PixelParams pixel, Light light, float occlusion) { +vec3 surfaceShading(const PixelParams pixel, const Light light, float occlusion) { vec3 h = normalize(shading_view + light.l); float NoL = light.NoL; float NoH = saturate(dot(shading_normal, h)); diff --git a/shaders/src/shading_model_standard.fs b/shaders/src/shading_model_standard.fs index 392cfab45ea..025295ba279 100644 --- a/shaders/src/shading_model_standard.fs +++ b/shaders/src/shading_model_standard.fs @@ -1,5 +1,5 @@ #if defined(MATERIAL_HAS_SHEEN_COLOR) -vec3 sheenLobe(PixelParams pixel, float NoV, float NoL, float NoH) { +vec3 sheenLobe(const PixelParams pixel, float NoV, float NoL, float NoH) { float D = distributionCloth(pixel.sheenRoughness, NoH); float V = visibilityCloth(NoV, NoL); @@ -8,7 +8,7 @@ vec3 sheenLobe(PixelParams pixel, float NoV, float NoL, float NoH) { #endif #if defined(MATERIAL_HAS_CLEAR_COAT) -float clearCoatLobe(PixelParams pixel, vec3 h, float NoH, float LoH, out float Fcc) { +float clearCoatLobe(const PixelParams pixel, const vec3 h, float NoH, float LoH, out float Fcc) { #if defined(MATERIAL_HAS_NORMAL) || defined(MATERIAL_HAS_CLEAR_COAT_NORMAL) // If the material has a normal map, we want to use the geometric normal // instead to avoid applying the normal map details to the clear coat layer @@ -28,7 +28,7 @@ float clearCoatLobe(PixelParams pixel, vec3 h, float NoH, float LoH, out float F #endif #if defined(MATERIAL_HAS_ANISOTROPY) -vec3 anisotropicLobe(PixelParams pixel, Light light, vec3 h, +vec3 anisotropicLobe(const PixelParams pixel, const Light light, const vec3 h, float NoV, float NoL, float NoH, float LoH) { vec3 l = light.l; @@ -58,7 +58,7 @@ vec3 anisotropicLobe(PixelParams pixel, Light light, vec3 h, } #endif -vec3 isotropicLobe(PixelParams pixel, Light light, vec3 h, +vec3 isotropicLobe(const PixelParams pixel, const Light light, const vec3 h, float NoV, float NoL, float NoH, float LoH) { float D = distribution(pixel.roughness, NoH, h); @@ -68,7 +68,7 @@ vec3 isotropicLobe(PixelParams pixel, Light light, vec3 h, return (D * V) * F; } -vec3 specularLobe(PixelParams pixel, Light light, vec3 h, +vec3 specularLobe(const PixelParams pixel, const Light light, const vec3 h, float NoV, float NoL, float NoH, float LoH) { #if defined(MATERIAL_HAS_ANISOTROPY) return anisotropicLobe(pixel, light, h, NoV, NoL, NoH, LoH); @@ -77,7 +77,7 @@ vec3 specularLobe(PixelParams pixel, Light light, vec3 h, #endif } -vec3 diffuseLobe(PixelParams pixel, float NoV, float NoL, float LoH) { +vec3 diffuseLobe(const PixelParams pixel, float NoV, float NoL, float LoH) { return pixel.diffuseColor * diffuse(pixel.roughness, NoV, NoL, LoH); } @@ -98,7 +98,7 @@ vec3 diffuseLobe(PixelParams pixel, float NoV, float NoL, float LoH) { * on the Cook-Torrance microfacet model, it uses cheaper terms than the surface * BRDF's specular lobe (see brdf.fs). */ -vec3 surfaceShading(PixelParams pixel, Light light, float occlusion) { +vec3 surfaceShading(const PixelParams pixel, const Light light, float occlusion) { vec3 h = normalize(shading_view + light.l); float NoV = shading_NoV; diff --git a/shaders/src/shading_model_subsurface.fs b/shaders/src/shading_model_subsurface.fs index c578b35e10d..c29c62a4102 100644 --- a/shaders/src/shading_model_subsurface.fs +++ b/shaders/src/shading_model_subsurface.fs @@ -5,7 +5,7 @@ * scattering. The BTDF itself is not physically based and does not represent a * correct interpretation of transmission events. */ -vec3 surfaceShading(PixelParams pixel, Light light, float occlusion) { +vec3 surfaceShading(const PixelParams pixel, const Light light, float occlusion) { vec3 h = normalize(shading_view + light.l); float NoL = light.NoL; diff --git a/shaders/src/shading_parameters.fs b/shaders/src/shading_parameters.fs index d2e143b1624..3c7d3aa31bc 100644 --- a/shaders/src/shading_parameters.fs +++ b/shaders/src/shading_parameters.fs @@ -56,7 +56,7 @@ void computeShadingParams() { * This function must be invoked by the user's material code (guaranteed by * the material compiler) after setting a value for MaterialInputs.normal. */ -void prepareMaterial(MaterialInputs material) { +void prepareMaterial(const MaterialInputs material) { #if defined(HAS_ATTRIBUTE_TANGENTS) #if defined(MATERIAL_HAS_NORMAL) shading_normal = normalize(shading_tangentToWorld * material.normal); diff --git a/shaders/src/shading_reflections.fs b/shaders/src/shading_reflections.fs index 5e4e4d4123c..dd1b81277d3 100644 --- a/shaders/src/shading_reflections.fs +++ b/shaders/src/shading_reflections.fs @@ -1,7 +1,7 @@ /* * screen-space reflection shading */ -vec4 evaluateMaterial(MaterialInputs material) { +vec4 evaluateMaterial(const MaterialInputs material) { #if defined(MATERIAL_HAS_REFLECTIONS) PixelParams pixel; diff --git a/shaders/src/shading_unlit.fs b/shaders/src/shading_unlit.fs index 6addcbf30fa..af30ddd7db0 100644 --- a/shaders/src/shading_unlit.fs +++ b/shaders/src/shading_unlit.fs @@ -1,4 +1,4 @@ -void addEmissive(MaterialInputs material, inout vec4 color) { +void addEmissive(const MaterialInputs material, inout vec4 color) { #if defined(MATERIAL_HAS_EMISSIVE) highp vec4 emissive = material.emissive; highp float attenuation = mix(1.0, getExposure(), emissive.w); @@ -27,7 +27,7 @@ float computeMaskedAlpha(float a) { * This is mostly useful in AR to cast shadows on unlit transparent shadow * receiving planes. */ -vec4 evaluateMaterial(MaterialInputs material) { +vec4 evaluateMaterial(const MaterialInputs material) { vec4 color = material.baseColor; #if defined(BLEND_MODE_MASKED) diff --git a/shaders/src/shadowing.fs b/shaders/src/shadowing.fs index 2bd209fa0d4..2f3a6cc0bb0 100644 --- a/shaders/src/shadowing.fs +++ b/shaders/src/shadowing.fs @@ -17,9 +17,9 @@ // PCF Shadow Sampling //------------------------------------------------------------------------------ -float sampleDepth(mediump sampler2DArrayShadow map, - highp vec4 scissorNormalized, - uint layer, highp vec2 uv, float depth) { +float sampleDepth(const mediump sampler2DArrayShadow map, + const highp vec4 scissorNormalized, + const uint layer, highp vec2 uv, float depth) { // clamp needed for directional lights and/or large kernels uv = clamp(uv, scissorNormalized.xy, scissorNormalized.zw); @@ -31,9 +31,9 @@ float sampleDepth(mediump sampler2DArrayShadow map, #if SHADOW_SAMPLING_METHOD == SHADOW_SAMPLING_PCF_HARD // use hardware assisted PCF -float ShadowSample_PCF_Hard(mediump sampler2DArrayShadow map, - highp vec4 scissorNormalized, - uint layer, highp vec4 shadowPosition) { +float ShadowSample_PCF_Hard(const mediump sampler2DArrayShadow map, + const highp vec4 scissorNormalized, + const uint layer, const highp vec4 shadowPosition) { highp vec3 position = shadowPosition.xyz * (1.0 / shadowPosition.w); // note: shadowPosition.z is in the [1, 0] range (reversed Z) return sampleDepth(map, scissorNormalized, layer, position.xy, position.z); @@ -42,9 +42,9 @@ float ShadowSample_PCF_Hard(mediump sampler2DArrayShadow map, #if SHADOW_SAMPLING_METHOD == SHADOW_SAMPLING_PCF_LOW // use hardware assisted PCF + 3x3 gaussian filter -float ShadowSample_PCF_Low(mediump sampler2DArrayShadow map, - highp vec4 scissorNormalized, - uint layer, highp vec4 shadowPosition) { +float ShadowSample_PCF_Low(const mediump sampler2DArrayShadow map, + const highp vec4 scissorNormalized, + const uint layer, const highp vec4 shadowPosition) { highp vec3 position = shadowPosition.xyz * (1.0 / shadowPosition.w); // note: shadowPosition.z is in the [1, 0] range (reversed Z) highp vec2 size = vec2(textureSize(map, 0)); @@ -80,9 +80,9 @@ float ShadowSample_PCF_Low(mediump sampler2DArrayShadow map, #endif // use manual PCF -float ShadowSample_PCF(mediump sampler2DArray map, - highp vec4 scissorNormalized, - uint layer, highp vec4 shadowPosition) { +float ShadowSample_PCF(const mediump sampler2DArray map, + const highp vec4 scissorNormalized, + const uint layer, const highp vec4 shadowPosition) { highp vec3 position = shadowPosition.xyz * (1.0 / shadowPosition.w); // note: shadowPosition.z is in the [1, 0] range (reversed Z) highp vec2 size = vec2(textureSize(map, 0)); @@ -147,7 +147,7 @@ float hardenedKernel(float x) { return 0.5 * x + 0.5; } -highp vec2 computeReceiverPlaneDepthBias(highp vec3 position) { +highp vec2 computeReceiverPlaneDepthBias(const highp vec3 position) { // see: GDC '06: Shadow Mapping: GPU-based Tips and Techniques // Chain rule to compute dz/du and dz/dv // |dz/du| |du/dx du/dy|^-T |dz/dx| @@ -167,7 +167,7 @@ mat2 getRandomRotationMatrix(highp vec2 fragCoord) { return R; } -float getPenumbraLs(bool DIRECTIONAL, int index, highp float zLight) { +float getPenumbraLs(const bool DIRECTIONAL, const int index, const highp float zLight) { float penumbra; // This conditional is resolved at compile time if (DIRECTIONAL) { @@ -179,7 +179,7 @@ float getPenumbraLs(bool DIRECTIONAL, int index, highp float zLight) { return penumbra; } -float getPenumbraRatio(bool DIRECTIONAL, int index, +float getPenumbraRatio(const bool DIRECTIONAL, const int index, float z_receiver, float z_blocker) { // z_receiver/z_blocker are not linear depths (i.e. they're not distances) // Penumbra ratio for PCSS is given by: pr = (d_receiver - d_blocker) / d_blocker @@ -202,10 +202,10 @@ float getPenumbraRatio(bool DIRECTIONAL, int index, } void blockerSearchAndFilter(out float occludedCount, out float z_occSum, - mediump sampler2DArray map, highp vec4 scissorNormalized, highp vec2 uv, - float z_rec, uint layer, - highp vec2 filterRadii, mat2 R, highp vec2 dz_duv, - uint tapCount) { + const mediump sampler2DArray map, const highp vec4 scissorNormalized, const highp vec2 uv, + const float z_rec, const uint layer, + const highp vec2 filterRadii, const mat2 R, const highp vec2 dz_duv, + const uint tapCount) { occludedCount = 0.0; z_occSum = 0.0; for (uint i = 0u; i < tapCount; i++) { @@ -230,12 +230,12 @@ void blockerSearchAndFilter(out float occludedCount, out float z_occSum, } } -float filterPCSS(mediump sampler2DArray map, - highp vec4 scissorNormalized, - highp vec2 size, - highp vec2 uv, float z_rec, uint layer, - highp vec2 filterRadii, mat2 R, highp vec2 dz_duv, - uint tapCount) { +float filterPCSS(const mediump sampler2DArray map, + const highp vec4 scissorNormalized, + const highp vec2 size, + const highp vec2 uv, const float z_rec, const uint layer, + const highp vec2 filterRadii, const mat2 R, const highp vec2 dz_duv, + const uint tapCount) { float occludedCount = 0.0; // must be highp to workaround a spirv-tools issue for (uint i = 0u; i < tapCount; i++) { @@ -270,11 +270,11 @@ float filterPCSS(mediump sampler2DArray map, * DPCF, PCF with contact hardenning simulation. * see "Shadow of Cold War", A scalable approach to shadowing -- by Kevin Myers */ -float ShadowSample_DPCF(bool DIRECTIONAL, - mediump sampler2DArray map, - highp vec4 scissorNormalized, - uint layer, int index, - highp vec4 shadowPosition, highp float zLight) { +float ShadowSample_DPCF(const bool DIRECTIONAL, + const mediump sampler2DArray map, + const highp vec4 scissorNormalized, + const uint layer, const int index, + const highp vec4 shadowPosition, const highp float zLight) { highp vec3 position = shadowPosition.xyz * (1.0 / shadowPosition.w); highp vec2 texelSize = vec2(1.0) / vec2(textureSize(map, 0)); @@ -317,11 +317,11 @@ float ShadowSample_DPCF(bool DIRECTIONAL, return 1.0 - percentageOccluded; } -float ShadowSample_PCSS(bool DIRECTIONAL, - mediump sampler2DArray map, - highp vec4 scissorNormalized, - uint layer, int index, - highp vec4 shadowPosition, highp float zLight) { +float ShadowSample_PCSS(const bool DIRECTIONAL, + const mediump sampler2DArray map, + const highp vec4 scissorNormalized, + const uint layer, const int index, + const highp vec4 shadowPosition, const highp float zLight) { highp vec2 size = vec2(textureSize(map, 0)); highp vec2 texelSize = vec2(1.0) / size; highp vec3 position = shadowPosition.xyz * (1.0 / shadowPosition.w); @@ -435,18 +435,18 @@ float screenSpaceContactShadow(vec3 lightDirection) { // VSM //------------------------------------------------------------------------------ -float linstep(float min, float max, float v) { +float linstep(const float min, const float max, const float v) { // we could use smoothstep() too return clamp((v - min) / (max - min), 0.0, 1.0); } -float reduceLightBleed(float pMax, float amount) { +float reduceLightBleed(const float pMax, const float amount) { // Remove the [0, amount] tail and linearly rescale (amount, 1]. return linstep(amount, 1.0, pMax); } -float chebyshevUpperBound(highp vec2 moments, highp float mean, - highp float minVariance, float lightBleedReduction) { +float chebyshevUpperBound(const highp vec2 moments, const highp float mean, + const highp float minVariance, const float lightBleedReduction) { // Donnelly and Lauritzen 2006, "Variance Shadow Maps" highp float variance = moments.y - (moments.x * moments.x); @@ -460,15 +460,15 @@ float chebyshevUpperBound(highp vec2 moments, highp float mean, return mean <= moments.x ? 1.0 : pMax; } -float evaluateShadowVSM(highp vec2 moments, highp float depth) { +float evaluateShadowVSM(const highp vec2 moments, const highp float depth) { highp float depthScale = frameUniforms.vsmDepthScale * depth; highp float minVariance = depthScale * depthScale; return chebyshevUpperBound(moments, depth, minVariance, frameUniforms.vsmLightBleedReduction); } -float ShadowSample_VSM(bool ELVSM, highp sampler2DArray shadowMap, - in highp vec4 scissorNormalized, - in uint layer, highp vec4 shadowPosition) { +float ShadowSample_VSM(const bool ELVSM, const highp sampler2DArray shadowMap, + const highp vec4 scissorNormalized, + const uint layer, const highp vec4 shadowPosition) { // note: shadowPosition.z is in linear light-space normalized to [0, 1] // see: ShadowMap::computeVsmLightSpaceMatrix() in ShadowMap.cpp @@ -507,18 +507,18 @@ float ShadowSample_VSM(bool ELVSM, highp sampler2DArray shadowMap, // get texture coordinate for directional and spot shadow maps #if defined(VARIANT_HAS_DIRECTIONAL_LIGHTING) -highp vec4 getShadowPosition(int cascade) { +highp vec4 getShadowPosition(const int cascade) { return getCascadeLightSpacePosition(cascade); } #endif #if defined(VARIANT_HAS_DYNAMIC_LIGHTING) -highp vec4 getShadowPosition(int index, highp vec3 dir, highp float zLight) { +highp vec4 getShadowPosition(const int index, const highp vec3 dir, const highp float zLight) { return getSpotLightSpacePosition(index, dir, zLight); } #endif -int getPointLightFace(highp vec3 r) { +int getPointLightFace(const highp vec3 r) { highp vec4 tc; highp float rx = abs(r.x); highp float ry = abs(r.y); @@ -534,9 +534,9 @@ int getPointLightFace(highp vec3 r) { } // PCF sampling -float shadow(bool DIRECTIONAL, - in mediump sampler2DArrayShadow shadowMap, - in int index, highp vec4 shadowPosition, highp float zLight) { +float shadow(const bool DIRECTIONAL, + const mediump sampler2DArrayShadow shadowMap, + const int index, highp vec4 shadowPosition, highp float zLight) { highp vec4 scissorNormalized = shadowUniforms.shadows[index].scissorNormalized; uint layer = shadowUniforms.shadows[index].layer; #if SHADOW_SAMPLING_METHOD == SHADOW_SAMPLING_PCF_HARD @@ -547,9 +547,9 @@ float shadow(bool DIRECTIONAL, } // Shadow requiring a sampler2D sampler (VSM, DPCF and PCSS) -float shadow(bool DIRECTIONAL, - in highp sampler2DArray shadowMap, - in int index, highp vec4 shadowPosition, highp float zLight) { +float shadow(const bool DIRECTIONAL, + const highp sampler2DArray shadowMap, + const int index, highp vec4 shadowPosition, highp float zLight) { highp vec4 scissorNormalized = shadowUniforms.shadows[index].scissorNormalized; uint layer = shadowUniforms.shadows[index].layer; // This conditional is resolved at compile time diff --git a/shaders/src/vignette.fs b/shaders/src/vignette.fs index e246673bf2e..a32c4899554 100644 --- a/shaders/src/vignette.fs +++ b/shaders/src/vignette.fs @@ -7,7 +7,7 @@ // uv: viewport coordinates // vignette: pre-computed parameters midPoint, radius, aspect and feather // vignetteColor: color of the vignette effect -vec3 vignette(vec3 color, highp vec2 uv, vec4 vignette, vec4 vignetteColor) { +vec3 vignette(const vec3 color, const highp vec2 uv, const vec4 vignette, const vec4 vignetteColor) { float midPoint = vignette.x; float radius = vignette.y; float aspect = vignette.z; From d01b29fa01cf55c6ae0d71e06098eb8ceccc0a56 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Fri, 1 Sep 2023 10:59:30 -0700 Subject: [PATCH 23/23] vulkan: properly set bool spec onst (#7125) We wrote a bool directly into 4 bytes (as the first byte). This has two issues: - the other 3 bytes are not initialized - should be writing VK_TRUE/FALSE instead --- filament/backend/include/backend/Program.h | 3 +- filament/backend/src/vulkan/VulkanHandles.cpp | 33 +++++++++++-------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/filament/backend/include/backend/Program.h b/filament/backend/include/backend/Program.h index 2a491959e91..72bcf01e420 100644 --- a/filament/backend/include/backend/Program.h +++ b/filament/backend/include/backend/Program.h @@ -104,8 +104,9 @@ class Program { Sampler const* samplers, size_t count) noexcept; struct SpecializationConstant { + using Type = std::variant; uint32_t id; // id set in glsl - std::variant value; // value and type + Type value; // value and type }; Program& specializationConstants( diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp index e3d4579cd37..29a9f023ba5 100644 --- a/filament/backend/src/vulkan/VulkanHandles.cpp +++ b/filament/backend/src/vulkan/VulkanHandles.cpp @@ -87,21 +87,26 @@ VulkanProgram::VulkanProgram(VkDevice device, const Program& builder) noexcept : }; for (size_t i = 0; i < specializationConstants.size(); i++) { - const uint32_t offset = uint32_t(i) * 4; - std::visit([&](auto&& arg) { - using T = std::decay_t; - pEntries[i] = { - .constantID = specializationConstants[i].id, - .offset = offset, - // Turns out vulkan expects the size of bool to be 4 (verified through - // validation layer). So all expected types are of 4 bytes. - .size = 4, - }; - T* const addr = (T*)((char*)pData + offset); - *addr = arg; - }, specializationConstants[i].value); + uint32_t const offset = uint32_t(i) * 4; + pEntries[i] = { + .constantID = specializationConstants[i].id, + .offset = offset, + // Note that bools are 4-bytes in Vulkan + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkBool32.html + .size = 4, + }; + + using SpecConstant = Program::SpecializationConstant::Type; + char const* addr = (char*)pData + offset; + SpecConstant const& arg = specializationConstants[i].value; + if (std::holds_alternative(arg)) { + *((VkBool32*)addr) = std::get(arg) ? VK_TRUE : VK_FALSE; + } else if (std::holds_alternative(arg)) { + *((float*)addr) = std::get(arg); + } else { + *((int32_t*)addr) = std::get(arg); + } } - bundle.specializationInfos = pInfo; }