diff --git a/README.md b/README.md
index 0ed084ef84b..63f3a490ac7 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ repositories {
 }
 
 dependencies {
-    implementation 'com.google.android.filament:filament-android:1.42.0'
+    implementation 'com.google.android.filament:filament-android:1.42.1'
 }
 ```
 
@@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`:
 iOS projects can use CocoaPods to install the latest release:
 
 ```
-pod 'Filament', '~> 1.42.0'
+pod 'Filament', '~> 1.42.1'
 ```
 
 ### Snapshots
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 0058fb82e43..cb018b907c3 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -7,6 +7,11 @@ A new header is inserted each time a *tag* is created.
 Instead, if you are authoring a PR for the main branch, add your release note to
 [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md).
 
+## v1.42.1
+
+- Fix potential `EXC_BAD_ACCESS` with Metal backend: b/297059776
+- `setFrameCompletedCallback` now takes a `backend::CallbackHandler`.
+
 ## v1.42.0
 
 - engine: add preliminary support for instanced stereoscopic rendering [⚠️ **Recompile materials**]
diff --git a/android/filament-android/src/main/cpp/SwapChain.cpp b/android/filament-android/src/main/cpp/SwapChain.cpp
index 3693803ce18..27e006ae87a 100644
--- a/android/filament-android/src/main/cpp/SwapChain.cpp
+++ b/android/filament-android/src/main/cpp/SwapChain.cpp
@@ -27,11 +27,10 @@ extern "C" JNIEXPORT void JNICALL
 Java_com_google_android_filament_SwapChain_nSetFrameCompletedCallback(JNIEnv* env, jclass,
         jlong nativeSwapChain, jobject handler, jobject runnable) {
     SwapChain* swapChain = (SwapChain*) nativeSwapChain;
-    auto *callback = JniCallback::make(env, handler, runnable);
-    swapChain->setFrameCompletedCallback([](void* user) {
-        JniCallback* callback = (JniCallback*)user;
+    auto* callback = JniCallback::make(env, handler, runnable);
+    swapChain->setFrameCompletedCallback(nullptr, [callback](SwapChain* swapChain) {
         JniCallback::postToJavaAndDestroy(callback);
-    }, callback);
+    });
 }
 
 extern "C" JNIEXPORT jboolean JNICALL
diff --git a/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java b/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java
index 6d621f02ffb..9c0867fee2d 100644
--- a/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java
@@ -137,10 +137,6 @@ public Object getNativeWindow() {
      * </p>
      *
      * <p>
-     * The FrameCompletedCallback is guaranteed to be called on the main Filament thread.
-     * </p>
-     *
-     * <p>
      * Warning: Only Filament's Metal backend supports frame callbacks. Other backends ignore the
      * callback (which will never be called) and proceed normally.
      * </p>
diff --git a/android/filament-android/src/main/java/com/google/android/filament/View.java b/android/filament-android/src/main/java/com/google/android/filament/View.java
index ad2abc138bc..1b1d70dac4f 100644
--- a/android/filament-android/src/main/java/com/google/android/filament/View.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/View.java
@@ -27,6 +27,8 @@
 import static com.google.android.filament.Asserts.assertFloat4In;
 import static com.google.android.filament.Colors.LinearColor;
 
+import com.google.android.filament.proguard.UsedByNative;
+
 /**
  * Encompasses all the state needed for rendering a {@link Scene}.
  *
@@ -1095,10 +1097,29 @@ public void pick(int x, int y,
         nPick(getNativeObject(), x, y, handler, internalCallback);
     }
 
+    @UsedByNative("View.cpp")
     private static class InternalOnPickCallback implements Runnable {
+        private final OnPickCallback mUserCallback;
+        private final PickingQueryResult mPickingQueryResult = new PickingQueryResult();
+
+        @UsedByNative("View.cpp")
+        @Entity
+        int mRenderable;
+
+        @UsedByNative("View.cpp")
+        float mDepth;
+
+        @UsedByNative("View.cpp")
+        float mFragCoordsX;
+        @UsedByNative("View.cpp")
+        float mFragCoordsY;
+        @UsedByNative("View.cpp")
+        float mFragCoordsZ;
+
         public InternalOnPickCallback(OnPickCallback mUserCallback) {
             this.mUserCallback = mUserCallback;
         }
+
         @Override
         public void run() {
             mPickingQueryResult.renderable = mRenderable;
@@ -1108,13 +1129,6 @@ public void run() {
             mPickingQueryResult.fragCoords[2] = mFragCoordsZ;
             mUserCallback.onPick(mPickingQueryResult);
         }
-        private final OnPickCallback mUserCallback;
-        private final PickingQueryResult mPickingQueryResult = new PickingQueryResult();
-        @Entity int mRenderable;
-        float mDepth;
-        float mFragCoordsX;
-        float mFragCoordsY;
-        float mFragCoordsZ;
     }
 
     /**
diff --git a/android/gradle.properties b/android/gradle.properties
index 6962d3e13ee..528d6fba23d 100644
--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -1,5 +1,5 @@
 GROUP=com.google.android.filament
-VERSION_NAME=1.42.0
+VERSION_NAME=1.42.1
 
 POM_DESCRIPTION=Real-time physically based rendering engine for Android.
 
diff --git a/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt b/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt
index 1554bbdbbe0..723a642307c 100644
--- a/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt
+++ b/android/samples/sample-image-based-lighting/src/main/java/com/google/android/filament/ibl/MainActivity.kt
@@ -118,9 +118,10 @@ class MainActivity : Activity() {
     }
 
     private fun setupView() {
-        val ssaoOptions = view.ambientOcclusionOptions
-        ssaoOptions.enabled = true
-        view.ambientOcclusionOptions = ssaoOptions
+        // ambient occlusion is the cheapest effect that adds a lot of quality
+        view.ambientOcclusionOptions = view.ambientOcclusionOptions.apply {
+            enabled = true
+        }
 
         // NOTE: Try to disable post-processing (tone-mapping, etc.) to see the difference
         // view.isPostProcessingEnabled = false
diff --git a/filament/backend/CMakeLists.txt b/filament/backend/CMakeLists.txt
index 550f569f02d..7e21bf39199 100644
--- a/filament/backend/CMakeLists.txt
+++ b/filament/backend/CMakeLists.txt
@@ -27,7 +27,6 @@ set(SRCS
         src/BackendUtils.cpp
         src/BlobCacheKey.cpp
         src/Callable.cpp
-        src/CallbackHandler.cpp
         src/CircularBuffer.cpp
         src/CommandBufferQueue.cpp
         src/CommandStream.cpp
@@ -68,6 +67,8 @@ set(PRIVATE_HDRS
 if (FILAMENT_SUPPORTS_OPENGL AND NOT FILAMENT_USE_EXTERNAL_GLES3 AND NOT FILAMENT_USE_SWIFTSHADER)
     list(APPEND SRCS
             include/backend/platforms/OpenGLPlatform.h
+            src/opengl/CallbackManager.h
+            src/opengl/CallbackManager.cpp
             src/opengl/gl_headers.cpp
             src/opengl/gl_headers.h
             src/opengl/GLUtils.cpp
diff --git a/filament/backend/include/backend/CallbackHandler.h b/filament/backend/include/backend/CallbackHandler.h
index dee3aaa2515..3ffc707cdd1 100644
--- a/filament/backend/include/backend/CallbackHandler.h
+++ b/filament/backend/include/backend/CallbackHandler.h
@@ -66,7 +66,7 @@ class CallbackHandler {
     virtual void post(void* user, Callback callback) = 0;
 
 protected:
-    virtual ~CallbackHandler();
+    virtual ~CallbackHandler() = default;
 };
 
 } // namespace filament::backend
diff --git a/filament/backend/include/backend/DriverEnums.h b/filament/backend/include/backend/DriverEnums.h
index 0e492893fa9..a7ef823941b 100644
--- a/filament/backend/include/backend/DriverEnums.h
+++ b/filament/backend/include/backend/DriverEnums.h
@@ -1126,8 +1126,6 @@ static_assert(sizeof(StencilState) == 12u,
 
 using FrameScheduledCallback = void(*)(PresentCallable callable, void* user);
 
-using FrameCompletedCallback = void(*)(void* user);
-
 enum class Workaround : uint16_t {
     // The EASU pass must split because shader compiler flattens early-exit branch
     SPLIT_EASU,
diff --git a/filament/backend/include/backend/Program.h b/filament/backend/include/backend/Program.h
index 2a491959e91..72bcf01e420 100644
--- a/filament/backend/include/backend/Program.h
+++ b/filament/backend/include/backend/Program.h
@@ -104,8 +104,9 @@ class Program {
             Sampler const* samplers, size_t count) noexcept;
 
     struct SpecializationConstant {
+        using Type = std::variant<int32_t, float, bool>;
         uint32_t id;                                // id set in glsl
-        std::variant<int32_t, float, bool> value;   // value and type
+        Type value;                                 // value and type
     };
 
     Program& specializationConstants(
diff --git a/filament/backend/include/backend/platforms/OpenGLPlatform.h b/filament/backend/include/backend/platforms/OpenGLPlatform.h
index c41dce43602..3f4488c5f53 100644
--- a/filament/backend/include/backend/platforms/OpenGLPlatform.h
+++ b/filament/backend/include/backend/platforms/OpenGLPlatform.h
@@ -288,6 +288,12 @@ class OpenGLPlatform : public Platform {
      * @see terminate()
      */
     virtual void createContext(bool shared);
+
+    /**
+     * Detach and destroy the current context if any and releases all resources associated to
+     * this thread.
+     */
+    virtual void releaseContext() noexcept;
 };
 
 } // namespace filament
diff --git a/filament/backend/include/backend/platforms/PlatformEGL.h b/filament/backend/include/backend/platforms/PlatformEGL.h
index 8902f14f767..79400540063 100644
--- a/filament/backend/include/backend/platforms/PlatformEGL.h
+++ b/filament/backend/include/backend/platforms/PlatformEGL.h
@@ -40,6 +40,7 @@ class PlatformEGL : public OpenGLPlatform {
     PlatformEGL() noexcept;
     bool isExtraContextSupported() const noexcept override;
     void createContext(bool shared) override;
+    void releaseContext() noexcept override;
 
 protected:
 
@@ -139,6 +140,7 @@ class PlatformEGL : public OpenGLPlatform {
             bool KHR_create_context = false;
             bool KHR_gl_colorspace = false;
             bool KHR_no_config_context = false;
+            bool KHR_surfaceless_context = false;
         } egl;
     } ext;
 
diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc
index 2cb16a60fed..37ddd4c6ba0 100644
--- a/filament/backend/include/private/backend/DriverAPI.inc
+++ b/filament/backend/include/private/backend/DriverAPI.inc
@@ -142,7 +142,8 @@ DECL_DRIVER_API_N(setFrameScheduledCallback,
 
 DECL_DRIVER_API_N(setFrameCompletedCallback,
         backend::SwapChainHandle, sch,
-        backend::FrameCompletedCallback, callback,
+        backend::CallbackHandler*, handler,
+        backend::CallbackHandler::Callback, callback,
         void*, user)
 
 DECL_DRIVER_API_N(setPresentationTime,
@@ -273,6 +274,7 @@ DECL_DRIVER_API_N(destroyRenderTarget,    backend::RenderTargetHandle, rth)
 DECL_DRIVER_API_N(destroySwapChain,       backend::SwapChainHandle, sch)
 DECL_DRIVER_API_N(destroyStream,          backend::StreamHandle, sh)
 DECL_DRIVER_API_N(destroyTimerQuery,      backend::TimerQueryHandle, sh)
+DECL_DRIVER_API_N(destroyFence,           backend::FenceHandle, fh)
 
 /*
  * Synchronous APIs
@@ -286,7 +288,6 @@ DECL_DRIVER_API_SYNCHRONOUS_N(void, setAcquiredImage, backend::StreamHandle, str
 DECL_DRIVER_API_SYNCHRONOUS_N(void, setStreamDimensions, backend::StreamHandle, stream, uint32_t, width, uint32_t, height)
 DECL_DRIVER_API_SYNCHRONOUS_N(int64_t, getStreamTimestamp, backend::StreamHandle, stream)
 DECL_DRIVER_API_SYNCHRONOUS_N(void, updateStreams, backend::DriverApi*, driver)
-DECL_DRIVER_API_SYNCHRONOUS_N(void, destroyFence, backend::FenceHandle, fh)
 DECL_DRIVER_API_SYNCHRONOUS_N(backend::FenceStatus, getFenceStatus, backend::FenceHandle, fh)
 DECL_DRIVER_API_SYNCHRONOUS_N(bool, isTextureFormatSupported, backend::TextureFormat, format)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isTextureSwizzleSupported)
@@ -297,6 +298,8 @@ DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameBufferFetchMultiSampleSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameTimeSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isAutoDepthResolveSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isSRGBSwapChainSupported)
+DECL_DRIVER_API_SYNCHRONOUS_0(bool, isStereoSupported)
+DECL_DRIVER_API_SYNCHRONOUS_0(bool, isParallelShaderCompileSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(uint8_t, getMaxDrawBuffers)
 DECL_DRIVER_API_SYNCHRONOUS_0(size_t, getMaxUniformBufferSize)
 DECL_DRIVER_API_SYNCHRONOUS_0(math::float2, getClipSpaceParams)
diff --git a/filament/backend/src/CallbackHandler.cpp b/filament/backend/src/CallbackHandler.cpp
deleted file mode 100644
index a1c067b6d26..00000000000
--- a/filament/backend/src/CallbackHandler.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2021 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <backend/CallbackHandler.h>
-
-namespace filament::backend {
-
-CallbackHandler::~CallbackHandler() = default;
-
-} // namespace filament::backend
diff --git a/filament/backend/src/CompilerThreadPool.cpp b/filament/backend/src/CompilerThreadPool.cpp
index 008b353aaa1..591ae063c31 100644
--- a/filament/backend/src/CompilerThreadPool.cpp
+++ b/filament/backend/src/CompilerThreadPool.cpp
@@ -16,6 +16,8 @@
 
 #include "CompilerThreadPool.h"
 
+#include <utils/Systrace.h>
+
 #include <memory>
 
 namespace filament::backend {
@@ -32,16 +34,14 @@ CompilerThreadPool::~CompilerThreadPool() noexcept {
     assert_invariant(mQueues[1].empty());
 }
 
-void CompilerThreadPool::init(uint32_t threadCount, JobSystem::Priority priority,
-        ThreadSetup&& threadSetup) noexcept {
+void CompilerThreadPool::init(uint32_t threadCount,
+        ThreadSetup&& threadSetup, ThreadCleanup&& threadCleanup) noexcept {
     auto setup = std::make_shared<ThreadSetup>(std::move(threadSetup));
+    auto cleanup = std::make_shared<ThreadCleanup>(std::move(threadCleanup));
 
     for (size_t i = 0; i < threadCount; i++) {
-        mCompilerThreads.emplace_back([this, priority, setup]() {
-            // give the thread a name
-            JobSystem::setThreadName("CompilerThreadPool");
-            // run at a slightly lower priority than other filament threads
-            JobSystem::setThreadPriority(priority);
+        mCompilerThreads.emplace_back([this, setup, cleanup]() {
+            SYSTRACE_CONTEXT();
 
             (*setup)();
 
@@ -53,7 +53,11 @@ void CompilerThreadPool::init(uint32_t threadCount, JobSystem::Priority priority
                             (!std::all_of( std::begin(mQueues), std::end(mQueues),
                                     [](auto&& q) { return q.empty(); }));
                 });
-                if (!mExitRequested) {
+
+                SYSTRACE_VALUE32("CompilerThreadPool Jobs",
+                        mQueues[0].size() + mQueues[1].size());
+
+                if (UTILS_LIKELY(!mExitRequested)) {
                     Job job;
                     // use the first queue that's not empty
                     auto& queue = [this]() -> auto& {
@@ -73,6 +77,8 @@ void CompilerThreadPool::init(uint32_t threadCount, JobSystem::Priority priority
                     job();
                 }
             }
+
+            (*cleanup)();
         });
 
     }
diff --git a/filament/backend/src/CompilerThreadPool.h b/filament/backend/src/CompilerThreadPool.h
index 78ce4c70796..fbdff68f695 100644
--- a/filament/backend/src/CompilerThreadPool.h
+++ b/filament/backend/src/CompilerThreadPool.h
@@ -20,12 +20,13 @@
 #include <backend/DriverEnums.h>
 
 #include <utils/Invocable.h>
-#include <utils/JobSystem.h>
+#include <utils/Mutex.h>
+#include <utils/Condition.h>
 
 #include <array>
-#include <atomic>
 #include <deque>
 #include <memory>
+#include <thread>
 #include <utility>
 #include <vector>
 
@@ -45,8 +46,9 @@ class CompilerThreadPool {
     ~CompilerThreadPool() noexcept;
     using Job = utils::Invocable<void()>;
     using ThreadSetup = utils::Invocable<void()>;
-    void init(uint32_t threadCount, utils::JobSystem::Priority priority,
-            ThreadSetup&& threadSetup) noexcept;
+    using ThreadCleanup = utils::Invocable<void()>;
+    void init(uint32_t threadCount,
+            ThreadSetup&& threadSetup, ThreadCleanup&& threadCleanup) noexcept;
     void terminate() noexcept;
     void queue(CompilerPriorityQueue priorityQueue, program_token_t const& token, Job&& job);
     Job dequeue(program_token_t const& token);
@@ -54,9 +56,9 @@ class CompilerThreadPool {
 private:
     using Queue = std::deque<std::pair<program_token_t, Job>>;
     std::vector<std::thread> mCompilerThreads;
-    std::atomic_bool mExitRequested{false};
-    std::mutex mQueueLock;
-    std::condition_variable mQueueCondition;
+    bool mExitRequested{ false };
+    utils::Mutex mQueueLock;
+    utils::Condition mQueueCondition;
     std::array<Queue, 2> mQueues;
     // lock must be held for methods below
     std::pair<Queue&, Queue::iterator> find(program_token_t const& token);
diff --git a/filament/backend/src/DriverBase.h b/filament/backend/src/DriverBase.h
index 3e7f2647d2f..abf68901164 100644
--- a/filament/backend/src/DriverBase.h
+++ b/filament/backend/src/DriverBase.h
@@ -165,13 +165,6 @@ class DriverBase : public Driver {
 
     void purge() noexcept final;
 
-    // --------------------------------------------------------------------------------------------
-    // Privates
-    // --------------------------------------------------------------------------------------------
-
-protected:
-    class CallbackDataDetails;
-
     // Helpers...
     struct CallbackData {
         CallbackData(CallbackData const &) = delete;
@@ -202,6 +195,13 @@ class DriverBase : public Driver {
 
     void scheduleCallback(CallbackHandler* handler, void* user, CallbackHandler::Callback callback);
 
+    // --------------------------------------------------------------------------------------------
+    // Privates
+    // --------------------------------------------------------------------------------------------
+
+protected:
+    class CallbackDataDetails;
+
     inline void scheduleDestroy(BufferDescriptor&& buffer) noexcept {
         if (buffer.hasCallback()) {
             scheduleDestroySlow(std::move(buffer));
diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm
index 4476da77371..1d036d90a05 100644
--- a/filament/backend/src/metal/MetalDriver.mm
+++ b/filament/backend/src/metal/MetalDriver.mm
@@ -176,9 +176,9 @@
 }
 
 void MetalDriver::setFrameCompletedCallback(Handle<HwSwapChain> sch,
-        FrameCompletedCallback callback, void* user) {
+        CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
     auto* swapChain = handle_cast<MetalSwapChain>(sch);
-    swapChain->setFrameCompletedCallback(callback, user);
+    swapChain->setFrameCompletedCallback(handler, callback, user);
 }
 
 void MetalDriver::execute(std::function<void(void)> const& fn) noexcept {
@@ -696,6 +696,14 @@
     return false;
 }
 
+bool MetalDriver::isStereoSupported() {
+    return true;
+}
+
+bool MetalDriver::isParallelShaderCompileSupported() {
+    return false;
+}
+
 bool MetalDriver::isWorkaroundNeeded(Workaround workaround) {
     switch (workaround) {
         case Workaround::SPLIT_EASU:
diff --git a/filament/backend/src/metal/MetalHandles.h b/filament/backend/src/metal/MetalHandles.h
index 9ffa1a0bda8..b129d478d73 100644
--- a/filament/backend/src/metal/MetalHandles.h
+++ b/filament/backend/src/metal/MetalHandles.h
@@ -70,7 +70,8 @@ class MetalSwapChain : public HwSwapChain {
     void releaseDrawable();
 
     void setFrameScheduledCallback(FrameScheduledCallback callback, void* user);
-    void setFrameCompletedCallback(FrameCompletedCallback callback, void* user);
+    void setFrameCompletedCallback(CallbackHandler* handler,
+            CallbackHandler::Callback callback, void* user);
 
     // For CAMetalLayer-backed SwapChains, presents the drawable or schedules a
     // FrameScheduledCallback.
@@ -112,8 +113,11 @@ class MetalSwapChain : public HwSwapChain {
     FrameScheduledCallback frameScheduledCallback = nullptr;
     void* frameScheduledUserData = nullptr;
 
-    FrameCompletedCallback frameCompletedCallback = nullptr;
-    void* frameCompletedUserData = nullptr;
+    struct {
+        CallbackHandler* handler = nullptr;
+        CallbackHandler::Callback callback = {};
+        void* user = nullptr;
+    } frameCompleted;
 };
 
 class MetalBufferObject : public HwBufferObject {
diff --git a/filament/backend/src/metal/MetalHandles.mm b/filament/backend/src/metal/MetalHandles.mm
index 99e8b36227d..0b4d0b3c4dd 100644
--- a/filament/backend/src/metal/MetalHandles.mm
+++ b/filament/backend/src/metal/MetalHandles.mm
@@ -194,13 +194,15 @@ static inline MTLTextureUsage getMetalTextureUsage(TextureUsage usage) {
     frameScheduledUserData = user;
 }
 
-void MetalSwapChain::setFrameCompletedCallback(FrameCompletedCallback callback, void* user) {
-    frameCompletedCallback = callback;
-    frameCompletedUserData = user;
+void MetalSwapChain::setFrameCompletedCallback(CallbackHandler* handler,
+        CallbackHandler::Callback callback, void* user) {
+    frameCompleted.handler = handler;
+    frameCompleted.callback = callback;
+    frameCompleted.user = user;
 }
 
 void MetalSwapChain::present() {
-    if (frameCompletedCallback) {
+    if (frameCompleted.callback) {
         scheduleFrameCompletedCallback();
     }
     if (drawable) {
@@ -244,30 +246,17 @@ void presentDrawable(bool presentFrame, void* user) {
 }
 
 void MetalSwapChain::scheduleFrameCompletedCallback() {
-    if (!frameCompletedCallback) {
+    if (!frameCompleted.callback) {
         return;
     }
 
-    FrameCompletedCallback callback = frameCompletedCallback;
-    void* userData = frameCompletedUserData;
+    CallbackHandler* handler = frameCompleted.handler;
+    void* user = frameCompleted.user;
+    CallbackHandler::Callback callback = frameCompleted.callback;
+
+    MetalDriver* driver = context.driver;
     [getPendingCommandBuffer(&context) addCompletedHandler:^(id<MTLCommandBuffer> cb) {
-        struct CallbackData {
-            void* userData;
-            FrameCompletedCallback callback;
-        };
-        CallbackData* data = new CallbackData();
-        data->userData = userData;
-        data->callback = callback;
-
-        // Instantiate a BufferDescriptor with a callback for the sole purpose of passing it to
-        // scheduleDestroy. This forces the BufferDescriptor callback (and thus the
-        // FrameCompletedCallback) to be called on the user thread.
-        BufferDescriptor b(nullptr, 0u, [](void* buffer, size_t size, void* user) {
-            CallbackData* data = (CallbackData*) user;
-            data->callback(data->userData);
-            free(data);
-        }, data);
-        context.driver->scheduleDestroy(std::move(b));
+        driver->scheduleCallback(handler, user, callback);
     }];
 }
 
diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp
index 19b1cb5380f..3d1a9cdc327 100644
--- a/filament/backend/src/noop/NoopDriver.cpp
+++ b/filament/backend/src/noop/NoopDriver.cpp
@@ -58,7 +58,7 @@ void NoopDriver::setFrameScheduledCallback(Handle<HwSwapChain> sch,
 }
 
 void NoopDriver::setFrameCompletedCallback(Handle<HwSwapChain> sch,
-        FrameCompletedCallback callback, void* user) {
+        CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
 
 }
 
@@ -174,6 +174,14 @@ bool NoopDriver::isSRGBSwapChainSupported() {
     return false;
 }
 
+bool NoopDriver::isStereoSupported() {
+    return false;
+}
+
+bool NoopDriver::isParallelShaderCompileSupported() {
+    return false;
+}
+
 bool NoopDriver::isWorkaroundNeeded(Workaround) {
     return false;
 }
diff --git a/filament/backend/src/opengl/CallbackManager.cpp b/filament/backend/src/opengl/CallbackManager.cpp
new file mode 100644
index 00000000000..8d85a9f4886
--- /dev/null
+++ b/filament/backend/src/opengl/CallbackManager.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CallbackManager.h"
+
+#include "DriverBase.h"
+
+namespace filament::backend {
+
+CallbackManager::CallbackManager(DriverBase& driver) noexcept
+    : mDriver(driver), mCallbacks(1) {
+}
+
+CallbackManager::~CallbackManager() noexcept = default;
+
+void CallbackManager::terminate() noexcept {
+    for (auto&& item: mCallbacks) {
+        if (item.func) {
+            mDriver.scheduleCallback(
+                    item.handler, item.user, item.func);
+        }
+    }
+}
+
+CallbackManager::Handle CallbackManager::get() const noexcept {
+    Container::const_iterator const curr = getCurrent();
+    curr->count.fetch_add(1);
+    return curr;
+}
+
+void CallbackManager::put(Handle& curr) noexcept {
+    if (curr->count.fetch_sub(1) == 1) {
+        if (curr->func) {
+            mDriver.scheduleCallback(
+                    curr->handler, curr->user, curr->func);
+            destroySlot(curr);
+        }
+    }
+    curr = {};
+}
+
+void CallbackManager::setCallback(
+        CallbackHandler* handler, CallbackHandler::Callback func, void* user) {
+    assert_invariant(func);
+    Container::iterator const curr = allocateNewSlot();
+    curr->handler = handler;
+    curr->func = func;
+    curr->user = user;
+    if (curr->count == 0) {
+        mDriver.scheduleCallback(
+                curr->handler, curr->user, curr->func);
+        destroySlot(curr);
+    }
+}
+
+} // namespace filament::backend
diff --git a/filament/backend/src/opengl/CallbackManager.h b/filament/backend/src/opengl/CallbackManager.h
new file mode 100644
index 00000000000..5349f201265
--- /dev/null
+++ b/filament/backend/src/opengl/CallbackManager.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_OPENGL_CALLBACKMANAGER_H
+#define TNT_FILAMENT_BACKEND_OPENGL_CALLBACKMANAGER_H
+
+#include <backend/CallbackHandler.h>
+
+#include <utils/Mutex.h>
+
+#include <atomic>
+#include <mutex>
+#include <list>
+
+namespace filament::backend {
+
+class DriverBase;
+class CallbackHandler;
+
+/*
+ * CallbackManager schedules user callbacks once all previous conditions are met.
+ * A "Condition" is created by calling "get" and is met by calling "put". These
+ * are typically called from different threads.
+ * The callback is specified with "setCallback", which atomically creates a new set of
+ * conditions to be met.
+ */
+class CallbackManager {
+    struct Callback {
+        mutable std::atomic_int count{};
+        CallbackHandler* handler = nullptr;
+        CallbackHandler::Callback func = {};
+        void* user = nullptr;
+    };
+
+    using Container = std::list<Callback>;
+
+public:
+    using Handle = Container::const_iterator;
+
+    explicit CallbackManager(DriverBase& driver) noexcept;
+
+    ~CallbackManager() noexcept;
+
+    // Calls all the pending callbacks regardless of remaining conditions to be met. This is to
+    // avoid leaking resources for instance. It also doesn't matter if the conditions are met
+    // because we're shutting down.
+    void terminate() noexcept;
+
+    // creates a condition and get a handle for it
+    Handle get() const noexcept;
+
+    // Announces the specified condition is met. If a callback was specified and all conditions
+    // prior to setting the callback are met, the callback is scheduled.
+    void put(Handle& curr) noexcept;
+
+    // Sets a callback to be called when all previously created (get) conditions are met (put).
+    // If there were no conditions created, or they're all already met, the callback is scheduled
+    // immediately.
+    void setCallback(CallbackHandler* handler, CallbackHandler::Callback func, void* user);
+
+private:
+    Container::const_iterator getCurrent() const noexcept {
+        std::lock_guard const lock(mLock);
+        return --mCallbacks.end();
+    }
+
+    Container::iterator allocateNewSlot() noexcept {
+        std::lock_guard const lock(mLock);
+        auto curr = --mCallbacks.end();
+        mCallbacks.emplace_back();
+        return curr;
+    }
+    void destroySlot(Container::const_iterator curr) noexcept {
+        std::lock_guard const lock(mLock);
+        mCallbacks.erase(curr);
+    }
+
+    DriverBase& mDriver;
+    mutable utils::Mutex mLock;
+    Container mCallbacks;
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_OPENGL_CALLBACKMANAGER_H
diff --git a/filament/backend/src/opengl/OpenGLContext.cpp b/filament/backend/src/opengl/OpenGLContext.cpp
index e7a88f6b96d..072096718b3 100644
--- a/filament/backend/src/opengl/OpenGLContext.cpp
+++ b/filament/backend/src/opengl/OpenGLContext.cpp
@@ -49,6 +49,7 @@ bool OpenGLContext::queryOpenGLVersion(GLint* major, GLint* minor) noexcept {
 }
 
 OpenGLContext::OpenGLContext() noexcept {
+
     state.vao.p = &mDefaultVAO;
 
     // These queries work with all GL/GLES versions!
@@ -61,265 +62,74 @@ OpenGLContext::OpenGLContext() noexcept {
               "[" << state.version << "], [" << state.shader << "]" << io::endl;
 
     /*
-     * Figure out GL / GLES version and available features
+     * Figure out GL / GLES version, extensions and capabilities we need to
+     * determine the feature level
      */
 
     queryOpenGLVersion(&state.major, &state.minor);
 
-    glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE, &gets.max_renderbuffer_size);
-    glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &gets.max_texture_image_units);
-    glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &gets.max_combined_texture_image_units);
+    OpenGLContext::initExtensions(&ext, state.major, state.minor);
 
-    if (state.major > 2) { // this check works for both GL and GLES, but is intended for GLES
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-        glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &gets.max_uniform_block_size);
-        glGetIntegerv(GL_MAX_UNIFORM_BUFFER_BINDINGS, &gets.max_uniform_buffer_bindings);
-        glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &gets.uniform_buffer_offset_alignment);
-        glGetIntegerv(GL_MAX_SAMPLES, &gets.max_samples);
-        glGetIntegerv(GL_MAX_DRAW_BUFFERS, &gets.max_draw_buffers);
-        glGetIntegerv(GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS,
-                &gets.max_transform_feedback_separate_attribs);
-#endif
-    } else {
-        gets.max_uniform_block_size = 0;
-        gets.max_uniform_buffer_bindings = 0;
-        gets.uniform_buffer_offset_alignment = 0;
-        gets.max_samples = 1;
-        gets.max_draw_buffers = 1;
-        gets.max_transform_feedback_separate_attribs = 0;
-    }
+    OpenGLContext::initProcs(&procs, ext, state.major, state.minor);
 
-    constexpr auto const caps3 = FEATURE_LEVEL_CAPS[+FeatureLevel::FEATURE_LEVEL_3];
-    constexpr GLint MAX_VERTEX_SAMPLER_COUNT = caps3.MAX_VERTEX_SAMPLER_COUNT;
-    constexpr GLint MAX_FRAGMENT_SAMPLER_COUNT = caps3.MAX_FRAGMENT_SAMPLER_COUNT;
+    OpenGLContext::initBugs(&bugs, ext, state.major, state.minor,
+            state.vendor, state.renderer, state.version, state.shader);
 
-    // default procs that can be overridden based on runtime version
-#ifdef BACKEND_OPENGL_LEVEL_GLES30
-    procs.genVertexArrays = glGenVertexArrays;
-    procs.bindVertexArray = glBindVertexArray;
-    procs.deleteVertexArrays = glDeleteVertexArrays;
+    glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE,             &gets.max_renderbuffer_size);
+    glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS,           &gets.max_texture_image_units);
+    glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS,  &gets.max_combined_texture_image_units);
 
-    // these are core in GL and GLES 3.x
-    procs.genQueries = glGenQueries;
-    procs.deleteQueries = glDeleteQueries;
-    procs.beginQuery = glBeginQuery;
-    procs.endQuery = glEndQuery;
-    procs.getQueryObjectuiv = glGetQueryObjectuiv;
-#   ifdef BACKEND_OPENGL_VERSION_GL
-        procs.getQueryObjectui64v = glGetQueryObjectui64v; // only core in GL
-#   elif defined(GL_EXT_disjoint_timer_query)
-        procs.getQueryObjectui64v = glGetQueryObjectui64vEXT;
-#   endif // BACKEND_OPENGL_VERSION_GL
-
-     // core in ES 3.0 and GL 4.3
-    procs.invalidateFramebuffer = glInvalidateFramebuffer;
-#endif // BACKEND_OPENGL_LEVEL_GLES30
-
-    // no-op if not supported
-    procs.maxShaderCompilerThreadsKHR = +[](GLuint) {};
+    mFeatureLevel = OpenGLContext::resolveFeatureLevel(state.major, state.minor, ext, gets, bugs);
 
 #ifdef BACKEND_OPENGL_VERSION_GLES
-    initExtensionsGLES();
-    if (state.major == 3) {
-        // Runtime OpenGL version is ES 3.x
-        assert_invariant(gets.max_texture_image_units >= 16);
-        assert_invariant(gets.max_combined_texture_image_units >= 32);
-        if (state.minor >= 1) {
-            features.multisample_texture = true;
-            // figure out our feature level
-            if (ext.EXT_texture_cube_map_array) {
-                mFeatureLevel = FeatureLevel::FEATURE_LEVEL_2;
-                if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT &&
-                    gets.max_combined_texture_image_units >=
-                            (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) {
-                    mFeatureLevel = FeatureLevel::FEATURE_LEVEL_3;
-                }
-            }
-        }
-    }
-#ifndef IOS // IOS is guaranteed to have ES3.x
-    else if (UTILS_UNLIKELY(state.major == 2)) {
-        // Runtime OpenGL version is ES 2.x
-
-#if defined(BACKEND_OPENGL_LEVEL_GLES30)
-        // mandatory extensions (all supported by Mali-400 and Adreno 304)
-        assert_invariant(ext.OES_depth_texture);
-        assert_invariant(ext.OES_depth24);
-        assert_invariant(ext.OES_packed_depth_stencil);
-        assert_invariant(ext.OES_rgb8_rgba8);
-        assert_invariant(ext.OES_standard_derivatives);
-        assert_invariant(ext.OES_texture_npot);
+    mShaderModel = ShaderModel::MOBILE;
+#else
+    mShaderModel = ShaderModel::DESKTOP;
 #endif
 
-        if (UTILS_LIKELY(ext.OES_vertex_array_object)) {
-            procs.genVertexArrays = glGenVertexArraysOES;
-            procs.bindVertexArray = glBindVertexArrayOES;
-            procs.deleteVertexArrays = glDeleteVertexArraysOES;
-        } else {
-            // if we don't have OES_vertex_array_object, just don't do anything with real VAOs,
-            // we'll just rebind everything each time. Most Mali-400 support this extension, but
-            // a few don't.
-            procs.genVertexArrays = +[](GLsizei, GLuint*) {};
-            procs.bindVertexArray = +[](GLuint) {};
-            procs.deleteVertexArrays = +[](GLsizei, GLuint const*) {};
-            // we activate this workaround path, which does the reset of array buffer
-            bugs.vao_doesnt_store_element_array_buffer_binding = true;
-        }
-
-        // EXT_disjoint_timer_query is optional -- pointers will be null if not available
-        procs.genQueries = glGenQueriesEXT;
-        procs.deleteQueries = glDeleteQueriesEXT;
-        procs.beginQuery = glBeginQueryEXT;
-        procs.endQuery = glEndQueryEXT;
-        procs.getQueryObjectuiv = glGetQueryObjectuivEXT;
-        procs.getQueryObjectui64v = glGetQueryObjectui64vEXT;
-
-        procs.invalidateFramebuffer = glDiscardFramebufferEXT;
-
-        procs.maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsKHR;
-
-        mFeatureLevel = FeatureLevel::FEATURE_LEVEL_0;
+#ifdef BACKEND_OPENGL_VERSION_GLES
+    if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_2) {
+        features.multisample_texture = true;
     }
-#endif // IOS
 #else
-    initExtensionsGL();
-    if (state.major == 4) {
-        assert_invariant(state.minor >= 1);
-        mShaderModel = ShaderModel::DESKTOP;
-        if (state.minor >= 3) {
-            // cubemap arrays are available as of OpenGL 4.0
-            mFeatureLevel = FeatureLevel::FEATURE_LEVEL_2;
-            // figure out our feature level
-            if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT &&
-                gets.max_combined_texture_image_units >=
-                        (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) {
-                mFeatureLevel = FeatureLevel::FEATURE_LEVEL_3;
-            }
-        }
+    if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1) {
         features.multisample_texture = true;
     }
-    // feedback loops are allowed on GL desktop as long as writes are disabled
-    bugs.allow_read_only_ancillary_feedback_loop = true;
-    assert_invariant(gets.max_texture_image_units >= 16);
-    assert_invariant(gets.max_combined_texture_image_units >= 32);
-
-    procs.maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsARB;
 #endif
 
+    if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1) {
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+        glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE,
+                &gets.max_uniform_block_size);
+        glGetIntegerv(GL_MAX_UNIFORM_BUFFER_BINDINGS,
+                &gets.max_uniform_buffer_bindings);
+        glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT,
+                &gets.uniform_buffer_offset_alignment);
+        glGetIntegerv(GL_MAX_SAMPLES,
+                &gets.max_samples);
+        glGetIntegerv(GL_MAX_DRAW_BUFFERS,
+                &gets.max_draw_buffers);
+        glGetIntegerv(GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS,
+                &gets.max_transform_feedback_separate_attribs);
 #ifdef GL_EXT_texture_filter_anisotropic
-    if (ext.EXT_texture_filter_anisotropic) {
-        glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &gets.max_anisotropy);
+        if (ext.EXT_texture_filter_anisotropic) {
+            glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &gets.max_anisotropy);
+        }
+#endif
+#endif
+    }
+#ifdef BACKEND_OPENGL_VERSION_GLES
+    else {
+        gets.max_uniform_block_size = 0;
+        gets.max_uniform_buffer_bindings = 0;
+        gets.uniform_buffer_offset_alignment = 0;
+        gets.max_samples = 1;
+        gets.max_draw_buffers = 1;
+        gets.max_transform_feedback_separate_attribs = 0;
+        gets.max_anisotropy = 1;
     }
 #endif
 
-    /*
-     * Figure out which driver bugs we need to workaround
-     */
-
-    const bool isAngle = strstr(state.renderer, "ANGLE");
-    if (!isAngle) {
-        if (strstr(state.renderer, "Adreno")) {
-            // Qualcomm GPU
-            bugs.invalidate_end_only_if_invalidate_start = true;
-
-            // On Adreno (As of 3/20) timer query seem to return the CPU time, not the GPU time.
-            bugs.dont_use_timer_query = true;
-
-            // Blits to texture arrays are failing
-            //   This bug continues to reproduce, though at times we've seen it appear to "go away".
-            //   The standalone sample app that was written to show this problem still reproduces.
-            //   The working hypothesis is that some other state affects this behavior.
-            bugs.disable_blit_into_texture_array = true;
-
-            // early exit condition is flattened in EASU code
-            bugs.split_easu = true;
-
-            // initialize the non-used uniform array for Adreno drivers.
-            bugs.enable_initialize_non_used_uniform_array = true;
-
-            int maj, min, driverMajor, driverMinor;
-            int const c = sscanf(state.version, "OpenGL ES %d.%d V@%d.%d", // NOLINT(cert-err34-c)
-                    &maj, &min, &driverMajor, &driverMinor);
-            if (c == 4) {
-                // Workarounds based on version here.
-                // notes:
-                //  bugs.invalidate_end_only_if_invalidate_start
-                //  - appeared at least in
-                //      "OpenGL ES 3.2 V@0490.0 (GIT@85da404, I46ff5fc46f, 1606794520) (Date:11/30/20)"
-                //  - wasn't present in
-                //      "OpenGL ES 3.2 V@0490.0 (GIT@0905e9f, Ia11ce2d146, 1599072951) (Date:09/02/20)"
-                //  - has been confirmed fixed in V@570.1 by Qualcomm
-                if (driverMajor < 490 || driverMajor > 570 ||
-                    (driverMajor == 570 && driverMinor >= 1)) {
-                    bugs.invalidate_end_only_if_invalidate_start = false;
-                }
-            }
-
-            // qualcomm seems to have no problem with this (which is good for us)
-            bugs.allow_read_only_ancillary_feedback_loop = true;
-        } else if (strstr(state.renderer, "Mali")) {
-            // ARM GPU
-            bugs.vao_doesnt_store_element_array_buffer_binding = true;
-            if (strstr(state.renderer, "Mali-T")) {
-                bugs.disable_glFlush = true;
-                bugs.disable_shared_context_draws = true;
-                bugs.texture_external_needs_rebind = true;
-                // We have not verified that timer queries work on Mali-T, so we disable to be safe.
-                bugs.dont_use_timer_query = true;
-            }
-            if (strstr(state.renderer, "Mali-G")) {
-                // We have run into several problems with timer queries on Mali-Gxx:
-                // - timer queries seem to cause memory corruptions in some cases on some devices
-                //   (see b/233754398)
-                //          - appeared at least in: "OpenGL ES 3.2 v1.r26p0-01eac0"
-                //          - wasn't present in: "OpenGL ES 3.2 v1.r32p1-00pxl1"
-                // - timer queries sometime crash with an NPE (see b/273759031)
-                bugs.dont_use_timer_query = true;
-            }
-            // Mali seems to have no problem with this (which is good for us)
-            bugs.allow_read_only_ancillary_feedback_loop = true;
-        } else if (strstr(state.renderer, "Intel")) {
-            // Intel GPU
-            bugs.vao_doesnt_store_element_array_buffer_binding = true;
-        } else if (strstr(state.renderer, "PowerVR")) {
-            // PowerVR GPU
-            // On PowerVR (Rogue GE8320) glFlush doesn't seem to do anything, in particular,
-            // it doesn't kick the GPU earlier, so don't issue these calls as they seem to slow
-            // things down.
-            bugs.disable_glFlush = true;
-            // On PowerVR (Rogue GE8320) using gl_InstanceID too early in the shader doesn't work.
-            bugs.powervr_shader_workarounds = true;
-            // On PowerVR (Rogue GE8320) destroying a fbo after glBlitFramebuffer is effectively
-            // equivalent to glFinish.
-            bugs.delay_fbo_destruction = true;
-            // PowerVR seems to have no problem with this (which is good for us)
-            bugs.allow_read_only_ancillary_feedback_loop = true;
-            // PowerVR has a shader compiler thread pinned on the last core
-            bugs.disable_thread_affinity = true;
-        } else if (strstr(state.renderer, "Apple")) {
-            // Apple GPU
-        } else if (strstr(state.renderer, "Tegra") ||
-                   strstr(state.renderer, "GeForce") ||
-                   strstr(state.renderer, "NV")) {
-            // NVIDIA GPU
-        } else if (strstr(state.renderer, "Vivante")) {
-            // Vivante GPU
-        } else if (strstr(state.renderer, "AMD") ||
-                   strstr(state.renderer, "ATI")) {
-            // AMD/ATI GPU
-        } else if (strstr(state.renderer, "Mozilla")) {
-            bugs.disable_invalidate_framebuffer = true;
-        }
-    } else {
-        // When running under ANGLE, it's a different set of workaround that we need.
-        if (strstr(state.renderer, "Adreno")) {
-            // Qualcomm GPU
-            // early exit condition is flattened in EASU code
-            // (that should be regardless of ANGLE, but we should double-check)
-            bugs.split_easu = true;
-        }
-        // TODO: see if we could use `bugs.allow_read_only_ancillary_feedback_loop = true`
-    }
 
     slog.v << "Feature level: " << +mFeatureLevel << '\n';
     slog.v << "Active workarounds: " << '\n';
@@ -345,14 +155,14 @@ OpenGLContext::OpenGLContext() noexcept {
 #endif
 
 #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-    assert_invariant(state.major <= 2 || gets.max_draw_buffers >= 4); // minspec
+    assert_invariant(mFeatureLevel == FeatureLevel::FEATURE_LEVEL_0 || gets.max_draw_buffers >= 4); // minspec
 #endif
 
     setDefaultState();
 
 #ifdef GL_EXT_texture_filter_anisotropic
 #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-    if (state.major > 2 && ext.EXT_texture_filter_anisotropic) {
+    if (mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1 && ext.EXT_texture_filter_anisotropic) {
         // make sure we don't have any error flag
         while (glGetError() != GL_NO_ERROR) { }
 
@@ -458,9 +268,287 @@ void OpenGLContext::setDefaultState() noexcept {
     }
 }
 
+
+void OpenGLContext::initProcs(Procs* procs,
+        Extensions const& ext, GLint major, GLint) noexcept {
+    (void)ext;
+    (void)major;
+
+    // default procs that can be overridden based on runtime version
+#ifdef BACKEND_OPENGL_LEVEL_GLES30
+    procs->genVertexArrays = glGenVertexArrays;
+    procs->bindVertexArray = glBindVertexArray;
+    procs->deleteVertexArrays = glDeleteVertexArrays;
+
+    // these are core in GL and GLES 3.x
+    procs->genQueries = glGenQueries;
+    procs->deleteQueries = glDeleteQueries;
+    procs->beginQuery = glBeginQuery;
+    procs->endQuery = glEndQuery;
+    procs->getQueryObjectuiv = glGetQueryObjectuiv;
+#   ifdef BACKEND_OPENGL_VERSION_GL
+    procs->getQueryObjectui64v = glGetQueryObjectui64v; // only core in GL
+#   elif defined(GL_EXT_disjoint_timer_query)
+    procs->getQueryObjectui64v = glGetQueryObjectui64vEXT;
+#   endif // BACKEND_OPENGL_VERSION_GL
+
+    // core in ES 3.0 and GL 4.3
+    procs->invalidateFramebuffer = glInvalidateFramebuffer;
+#endif // BACKEND_OPENGL_LEVEL_GLES30
+
+    // no-op if not supported
+    procs->maxShaderCompilerThreadsKHR = +[](GLuint) {};
+
+#ifdef BACKEND_OPENGL_VERSION_GLES
+#   ifndef IOS // IOS is guaranteed to have ES3.x
+    if (UTILS_UNLIKELY(major == 2)) {
+        // Runtime OpenGL version is ES 2.x
+        if (UTILS_LIKELY(ext.OES_vertex_array_object)) {
+            procs->genVertexArrays = glGenVertexArraysOES;
+            procs->bindVertexArray = glBindVertexArrayOES;
+            procs->deleteVertexArrays = glDeleteVertexArraysOES;
+        } else {
+            // if we don't have OES_vertex_array_object, just don't do anything with real VAOs,
+            // we'll just rebind everything each time. Most Mali-400 support this extension, but
+            // a few don't.
+            procs->genVertexArrays = +[](GLsizei, GLuint*) {};
+            procs->bindVertexArray = +[](GLuint) {};
+            procs->deleteVertexArrays = +[](GLsizei, GLuint const*) {};
+        }
+
+        // EXT_disjoint_timer_query is optional -- pointers will be null if not available
+        procs->genQueries = glGenQueriesEXT;
+        procs->deleteQueries = glDeleteQueriesEXT;
+        procs->beginQuery = glBeginQueryEXT;
+        procs->endQuery = glEndQueryEXT;
+        procs->getQueryObjectuiv = glGetQueryObjectuivEXT;
+        procs->getQueryObjectui64v = glGetQueryObjectui64vEXT;
+
+        procs->invalidateFramebuffer = glDiscardFramebufferEXT;
+
+        procs->maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsKHR;
+    }
+#   endif // IOS
+#else
+    procs->maxShaderCompilerThreadsKHR = glMaxShaderCompilerThreadsARB;
+#endif
+}
+
+void OpenGLContext::initBugs(Bugs* bugs, Extensions const& exts,
+        GLint major, GLint minor,
+        char const* vendor,
+        char const* renderer,
+        char const* version,
+        char const* shader) {
+
+    (void)major;
+    (void)minor;
+    (void)vendor;
+    (void)renderer;
+    (void)version;
+    (void)shader;
+
+    const bool isAngle = strstr(renderer, "ANGLE");
+    if (!isAngle) {
+        if (strstr(renderer, "Adreno")) {
+            // Qualcomm GPU
+            bugs->invalidate_end_only_if_invalidate_start = true;
+
+            // On Adreno (As of 3/20) timer query seem to return the CPU time, not the GPU time.
+            bugs->dont_use_timer_query = true;
+
+            // Blits to texture arrays are failing
+            //   This bug continues to reproduce, though at times we've seen it appear to "go away".
+            //   The standalone sample app that was written to show this problem still reproduces.
+            //   The working hypothesis is that some other state affects this behavior.
+            bugs->disable_blit_into_texture_array = true;
+
+            // early exit condition is flattened in EASU code
+            bugs->split_easu = true;
+
+            // initialize the non-used uniform array for Adreno drivers.
+            bugs->enable_initialize_non_used_uniform_array = true;
+
+            int maj, min, driverMajor, driverMinor;
+            int const c = sscanf(version, "OpenGL ES %d.%d V@%d.%d", // NOLINT(cert-err34-c)
+                    &maj, &min, &driverMajor, &driverMinor);
+            if (c == 4) {
+                // Workarounds based on version here.
+                // Notes:
+                //  bugs.invalidate_end_only_if_invalidate_start
+                //  - appeared at least in
+                //      "OpenGL ES 3.2 V@0490.0 (GIT@85da404, I46ff5fc46f, 1606794520) (Date:11/30/20)"
+                //  - wasn't present in
+                //      "OpenGL ES 3.2 V@0490.0 (GIT@0905e9f, Ia11ce2d146, 1599072951) (Date:09/02/20)"
+                //  - has been confirmed fixed in V@570.1 by Qualcomm
+                if (driverMajor < 490 || driverMajor > 570 ||
+                    (driverMajor == 570 && driverMinor >= 1)) {
+                    bugs->invalidate_end_only_if_invalidate_start = false;
+                }
+            }
+
+            // qualcomm seems to have no problem with this (which is good for us)
+            bugs->allow_read_only_ancillary_feedback_loop = true;
+
+            // Older Adreno devices that support ES3.0 only tend to be extremely buggy, so we
+            // fall back to ES2.0.
+            if (major == 3 && minor == 0) {
+                bugs->force_feature_level0 = true;
+            }
+        } else if (strstr(renderer, "Mali")) {
+            // ARM GPU
+            bugs->vao_doesnt_store_element_array_buffer_binding = true;
+            if (strstr(renderer, "Mali-T")) {
+                bugs->disable_glFlush = true;
+                bugs->disable_shared_context_draws = true;
+                bugs->texture_external_needs_rebind = true;
+                // We have not verified that timer queries work on Mali-T, so we disable to be safe.
+                bugs->dont_use_timer_query = true;
+            }
+            if (strstr(renderer, "Mali-G")) {
+                // We have run into several problems with timer queries on Mali-Gxx:
+                // - timer queries seem to cause memory corruptions in some cases on some devices
+                //   (see b/233754398)
+                //          - appeared at least in: "OpenGL ES 3.2 v1.r26p0-01eac0"
+                //          - wasn't present in: "OpenGL ES 3.2 v1.r32p1-00pxl1"
+                // - timer queries sometime crash with an NPE (see b/273759031)
+                bugs->dont_use_timer_query = true;
+            }
+            // Mali seems to have no problem with this (which is good for us)
+            bugs->allow_read_only_ancillary_feedback_loop = true;
+        } else if (strstr(renderer, "Intel")) {
+            // Intel GPU
+            bugs->vao_doesnt_store_element_array_buffer_binding = true;
+        } else if (strstr(renderer, "PowerVR")) {
+            // PowerVR GPU
+            // On PowerVR (Rogue GE8320) glFlush doesn't seem to do anything, in particular,
+            // it doesn't kick the GPU earlier, so don't issue these calls as they seem to slow
+            // things down.
+            bugs->disable_glFlush = true;
+            // On PowerVR (Rogue GE8320) using gl_InstanceID too early in the shader doesn't work.
+            bugs->powervr_shader_workarounds = true;
+            // On PowerVR (Rogue GE8320) destroying a fbo after glBlitFramebuffer is effectively
+            // equivalent to glFinish.
+            bugs->delay_fbo_destruction = true;
+            // PowerVR seems to have no problem with this (which is good for us)
+            bugs->allow_read_only_ancillary_feedback_loop = true;
+            // PowerVR has a shader compiler thread pinned on the last core
+            bugs->disable_thread_affinity = true;
+        } else if (strstr(renderer, "Apple")) {
+            // Apple GPU
+        } else if (strstr(renderer, "Tegra") ||
+                   strstr(renderer, "GeForce") ||
+                   strstr(renderer, "NV")) {
+            // NVIDIA GPU
+        } else if (strstr(renderer, "Vivante")) {
+            // Vivante GPU
+        } else if (strstr(renderer, "AMD") ||
+                   strstr(renderer, "ATI")) {
+            // AMD/ATI GPU
+        } else if (strstr(renderer, "Mozilla")) {
+            bugs->disable_invalidate_framebuffer = true;
+        }
+    } else {
+        // When running under ANGLE, it's a different set of workaround that we need.
+        if (strstr(renderer, "Adreno")) {
+            // Qualcomm GPU
+            // early exit condition is flattened in EASU code
+            // (that should be regardless of ANGLE, but we should double-check)
+            bugs->split_easu = true;
+        }
+        // TODO: see if we could use `bugs.allow_read_only_ancillary_feedback_loop = true`
+    }
+
+#ifdef BACKEND_OPENGL_VERSION_GLES
+#   ifndef IOS // IOS is guaranteed to have ES3.x
+    if (UTILS_UNLIKELY(major == 2)) {
+        if (UTILS_UNLIKELY(!exts.OES_vertex_array_object)) {
+            // we activate this workaround path, which does the reset of array buffer
+            bugs->vao_doesnt_store_element_array_buffer_binding = true;
+        }
+    }
+#   endif // IOS
+#else
+    // feedback loops are allowed on GL desktop as long as writes are disabled
+    bugs->allow_read_only_ancillary_feedback_loop = true;
+#endif
+}
+
+FeatureLevel OpenGLContext::resolveFeatureLevel(GLint major, GLint minor,
+        Extensions const& exts,
+        Gets const& gets,
+        Bugs const& bugs) noexcept {
+
+    constexpr auto const caps3 = FEATURE_LEVEL_CAPS[+FeatureLevel::FEATURE_LEVEL_3];
+    constexpr GLint MAX_VERTEX_SAMPLER_COUNT = caps3.MAX_VERTEX_SAMPLER_COUNT;
+    constexpr GLint MAX_FRAGMENT_SAMPLER_COUNT = caps3.MAX_FRAGMENT_SAMPLER_COUNT;
+
+    (void)exts;
+    (void)gets;
+    (void)bugs;
+
+    FeatureLevel featureLevel = FeatureLevel::FEATURE_LEVEL_1;
+
+#ifdef BACKEND_OPENGL_VERSION_GLES
+    if (major == 3) {
+        // Runtime OpenGL version is ES 3.x
+        assert_invariant(gets.max_texture_image_units >= 16);
+        assert_invariant(gets.max_combined_texture_image_units >= 32);
+        if (minor >= 1) {
+            // figure out our feature level
+            if (exts.EXT_texture_cube_map_array) {
+                featureLevel = FeatureLevel::FEATURE_LEVEL_2;
+                if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT &&
+                    gets.max_combined_texture_image_units >=
+                    (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) {
+                    featureLevel = FeatureLevel::FEATURE_LEVEL_3;
+                }
+            }
+        }
+    }
+#   ifndef IOS // IOS is guaranteed to have ES3.x
+    else if (UTILS_UNLIKELY(major == 2)) {
+        // Runtime OpenGL version is ES 2.x
+#       if defined(BACKEND_OPENGL_LEVEL_GLES30)
+        // mandatory extensions (all supported by Mali-400 and Adreno 304)
+        assert_invariant(exts.OES_depth_texture);
+        assert_invariant(exts.OES_depth24);
+        assert_invariant(exts.OES_packed_depth_stencil);
+        assert_invariant(exts.OES_rgb8_rgba8);
+        assert_invariant(exts.OES_standard_derivatives);
+        assert_invariant(exts.OES_texture_npot);
+#       endif
+        featureLevel = FeatureLevel::FEATURE_LEVEL_0;
+    }
+#   endif // IOS
+#else
+    assert_invariant(gets.max_texture_image_units >= 16);
+    assert_invariant(gets.max_combined_texture_image_units >= 32);
+    if (major == 4) {
+        assert_invariant(minor >= 1);
+        if (minor >= 3) {
+            // cubemap arrays are available as of OpenGL 4.0
+            featureLevel = FeatureLevel::FEATURE_LEVEL_2;
+            // figure out our feature level
+            if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT &&
+                gets.max_combined_texture_image_units >=
+                (MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) {
+                featureLevel = FeatureLevel::FEATURE_LEVEL_3;
+            }
+        }
+    }
+#endif
+
+    if (bugs.force_feature_level0) {
+        featureLevel = FeatureLevel::FEATURE_LEVEL_0;
+    }
+
+    return featureLevel;
+}
+
 #ifdef BACKEND_OPENGL_VERSION_GLES
 
-void OpenGLContext::initExtensionsGLES() noexcept {
+void OpenGLContext::initExtensionsGLES(Extensions* ext, GLint major, GLint minor) noexcept {
     const char * const extensions = (const char*)glGetString(GL_EXTENSIONS);
     GLUtils::unordered_string_set const exts = GLUtils::split(extensions);
     if constexpr (DEBUG_PRINT_EXTENSIONS) {
@@ -472,51 +560,50 @@ void OpenGLContext::initExtensionsGLES() noexcept {
 
     // figure out and initialize the extensions we need
     using namespace std::literals;
-    ext.APPLE_color_buffer_packed_float = exts.has("GL_APPLE_color_buffer_packed_float"sv);
-    ext.EXT_clip_control = exts.has("GL_EXT_clip_control"sv);
-    ext.EXT_clip_cull_distance = exts.has("GL_EXT_clip_cull_distance"sv);
-    ext.EXT_color_buffer_float = exts.has("GL_EXT_color_buffer_float"sv);
-    ext.EXT_color_buffer_half_float = exts.has("GL_EXT_color_buffer_half_float"sv);
-    ext.EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv);
-    ext.EXT_discard_framebuffer = exts.has("GL_EXT_discard_framebuffer"sv);
-    ext.EXT_disjoint_timer_query = exts.has("GL_EXT_disjoint_timer_query"sv);
-    ext.EXT_multisampled_render_to_texture = exts.has("GL_EXT_multisampled_render_to_texture"sv);
-    ext.EXT_multisampled_render_to_texture2 = exts.has("GL_EXT_multisampled_render_to_texture2"sv);
-    ext.EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv);
+    ext->APPLE_color_buffer_packed_float = exts.has("GL_APPLE_color_buffer_packed_float"sv);
+    ext->EXT_clip_control = exts.has("GL_EXT_clip_control"sv);
+    ext->EXT_clip_cull_distance = exts.has("GL_EXT_clip_cull_distance"sv);
+    ext->EXT_color_buffer_float = exts.has("GL_EXT_color_buffer_float"sv);
+    ext->EXT_color_buffer_half_float = exts.has("GL_EXT_color_buffer_half_float"sv);
+    ext->EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv);
+    ext->EXT_discard_framebuffer = exts.has("GL_EXT_discard_framebuffer"sv);
+    ext->EXT_disjoint_timer_query = exts.has("GL_EXT_disjoint_timer_query"sv);
+    ext->EXT_multisampled_render_to_texture = exts.has("GL_EXT_multisampled_render_to_texture"sv);
+    ext->EXT_multisampled_render_to_texture2 = exts.has("GL_EXT_multisampled_render_to_texture2"sv);
+    ext->EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv);
 #if !defined(__EMSCRIPTEN__)
-    ext.EXT_texture_compression_etc2 = true;
+    ext->EXT_texture_compression_etc2 = true;
 #endif
-    ext.EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv);
-    ext.EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv);
-    ext.EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv);
-    ext.EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv);
-    ext.EXT_texture_cube_map_array = exts.has("GL_EXT_texture_cube_map_array"sv) || exts.has("GL_OES_texture_cube_map_array"sv);
-    ext.GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv);
-    ext.KHR_debug = exts.has("GL_KHR_debug"sv);
-    ext.KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv);
-    ext.KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv);
-    ext.KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv);
-    ext.OES_depth_texture = exts.has("GL_OES_depth_texture"sv);
-    ext.OES_depth24 = exts.has("GL_OES_depth24"sv);
-    ext.OES_packed_depth_stencil = exts.has("GL_OES_packed_depth_stencil"sv);
-    ext.OES_EGL_image_external_essl3 = exts.has("GL_OES_EGL_image_external_essl3"sv);
-    ext.OES_rgb8_rgba8 = exts.has("GL_OES_rgb8_rgba8"sv);
-    ext.OES_standard_derivatives = exts.has("GL_OES_standard_derivatives"sv);
-    ext.OES_texture_npot = exts.has("GL_OES_texture_npot"sv);
-    ext.OES_vertex_array_object = exts.has("GL_OES_vertex_array_object"sv);
-    ext.WEBGL_compressed_texture_etc = exts.has("WEBGL_compressed_texture_etc"sv);
-    ext.WEBGL_compressed_texture_s3tc = exts.has("WEBGL_compressed_texture_s3tc"sv);
-    ext.WEBGL_compressed_texture_s3tc_srgb = exts.has("WEBGL_compressed_texture_s3tc_srgb"sv);
+    ext->EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv);
+    ext->EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv);
+    ext->EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv);
+    ext->EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv);
+    ext->EXT_texture_cube_map_array = exts.has("GL_EXT_texture_cube_map_array"sv) || exts.has("GL_OES_texture_cube_map_array"sv);
+    ext->GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv);
+    ext->KHR_debug = exts.has("GL_KHR_debug"sv);
+    ext->KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv);
+    ext->KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv);
+    ext->KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv);
+    ext->OES_depth_texture = exts.has("GL_OES_depth_texture"sv);
+    ext->OES_depth24 = exts.has("GL_OES_depth24"sv);
+    ext->OES_packed_depth_stencil = exts.has("GL_OES_packed_depth_stencil"sv);
+    ext->OES_EGL_image_external_essl3 = exts.has("GL_OES_EGL_image_external_essl3"sv);
+    ext->OES_rgb8_rgba8 = exts.has("GL_OES_rgb8_rgba8"sv);
+    ext->OES_standard_derivatives = exts.has("GL_OES_standard_derivatives"sv);
+    ext->OES_texture_npot = exts.has("GL_OES_texture_npot"sv);
+    ext->OES_vertex_array_object = exts.has("GL_OES_vertex_array_object"sv);
+    ext->WEBGL_compressed_texture_etc = exts.has("WEBGL_compressed_texture_etc"sv);
+    ext->WEBGL_compressed_texture_s3tc = exts.has("WEBGL_compressed_texture_s3tc"sv);
+    ext->WEBGL_compressed_texture_s3tc_srgb = exts.has("WEBGL_compressed_texture_s3tc_srgb"sv);
 
     // ES 3.2 implies EXT_color_buffer_float
-    if (state.major > 3 || (state.major == 3 && state.minor >= 2)) {
-        ext.EXT_color_buffer_float = true;
+    if (major > 3 || (major == 3 && minor >= 2)) {
+        ext->EXT_color_buffer_float = true;
     }
-
     // ES 3.x implies EXT_discard_framebuffer and OES_vertex_array_object
-    if (state.major >= 3) {
-        ext.EXT_discard_framebuffer = true;
-        ext.OES_vertex_array_object = true;
+    if (major >= 3) {
+        ext->EXT_discard_framebuffer = true;
+        ext->OES_vertex_array_object = true;
     }
 }
 
@@ -524,7 +611,7 @@ void OpenGLContext::initExtensionsGLES() noexcept {
 
 #ifdef BACKEND_OPENGL_VERSION_GL
 
-void OpenGLContext::initExtensionsGL() noexcept {
+void OpenGLContext::initExtensionsGL(Extensions* ext, GLint major, GLint minor) noexcept {
     GLUtils::unordered_string_set exts;
     GLint n = 0;
     glGetIntegerv(GL_NUM_EXTENSIONS, &n);
@@ -539,55 +626,52 @@ void OpenGLContext::initExtensionsGL() noexcept {
     }
 
     using namespace std::literals;
-    ext.APPLE_color_buffer_packed_float = true;  // Assumes core profile.
-    ext.ARB_shading_language_packing = exts.has("GL_ARB_shading_language_packing"sv);
-    ext.EXT_color_buffer_float = true;  // Assumes core profile.
-    ext.EXT_color_buffer_half_float = true;  // Assumes core profile.
-    ext.EXT_clip_cull_distance = true;
-    ext.EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv);
-    ext.EXT_discard_framebuffer = false;
-    ext.EXT_disjoint_timer_query = true;
-    ext.EXT_multisampled_render_to_texture = false;
-    ext.EXT_multisampled_render_to_texture2 = false;
-    ext.EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv);
-    ext.EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv);
-    ext.EXT_texture_compression_etc2 = exts.has("GL_ARB_ES3_compatibility"sv);
-    ext.EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv);
-    ext.EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv);
-    ext.EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv);
-    ext.EXT_texture_cube_map_array = true;
-    ext.EXT_texture_filter_anisotropic = exts.has("GL_EXT_texture_filter_anisotropic"sv);
-    ext.EXT_texture_sRGB = exts.has("GL_EXT_texture_sRGB"sv);
-    ext.GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv);
-    ext.KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv);
-    ext.KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv);
-    ext.KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv);
-    ext.OES_depth_texture = true;
-    ext.OES_depth24 = true;
-    ext.OES_EGL_image_external_essl3 = false;
-    ext.OES_rgb8_rgba8 = true;
-    ext.OES_standard_derivatives = true;
-    ext.OES_texture_npot = true;
-    ext.OES_vertex_array_object = true;
-    ext.WEBGL_compressed_texture_etc = false;
-    ext.WEBGL_compressed_texture_s3tc = false;
-    ext.WEBGL_compressed_texture_s3tc_srgb = false;
-
-    auto const major = state.major;
-    auto const minor = state.minor;
+    ext->APPLE_color_buffer_packed_float = true;  // Assumes core profile.
+    ext->ARB_shading_language_packing = exts.has("GL_ARB_shading_language_packing"sv);
+    ext->EXT_color_buffer_float = true;  // Assumes core profile.
+    ext->EXT_color_buffer_half_float = true;  // Assumes core profile.
+    ext->EXT_clip_cull_distance = true;
+    ext->EXT_debug_marker = exts.has("GL_EXT_debug_marker"sv);
+    ext->EXT_discard_framebuffer = false;
+    ext->EXT_disjoint_timer_query = true;
+    ext->EXT_multisampled_render_to_texture = false;
+    ext->EXT_multisampled_render_to_texture2 = false;
+    ext->EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv);
+    ext->EXT_texture_compression_bptc = exts.has("GL_EXT_texture_compression_bptc"sv);
+    ext->EXT_texture_compression_etc2 = exts.has("GL_ARB_ES3_compatibility"sv);
+    ext->EXT_texture_compression_rgtc = exts.has("GL_EXT_texture_compression_rgtc"sv);
+    ext->EXT_texture_compression_s3tc = exts.has("GL_EXT_texture_compression_s3tc"sv);
+    ext->EXT_texture_compression_s3tc_srgb = exts.has("GL_EXT_texture_compression_s3tc_srgb"sv);
+    ext->EXT_texture_cube_map_array = true;
+    ext->EXT_texture_filter_anisotropic = exts.has("GL_EXT_texture_filter_anisotropic"sv);
+    ext->EXT_texture_sRGB = exts.has("GL_EXT_texture_sRGB"sv);
+    ext->GOOGLE_cpp_style_line_directive = exts.has("GL_GOOGLE_cpp_style_line_directive"sv);
+    ext->KHR_parallel_shader_compile = exts.has("GL_KHR_parallel_shader_compile"sv);
+    ext->KHR_texture_compression_astc_hdr = exts.has("GL_KHR_texture_compression_astc_hdr"sv);
+    ext->KHR_texture_compression_astc_ldr = exts.has("GL_KHR_texture_compression_astc_ldr"sv);
+    ext->OES_depth_texture = true;
+    ext->OES_depth24 = true;
+    ext->OES_EGL_image_external_essl3 = false;
+    ext->OES_rgb8_rgba8 = true;
+    ext->OES_standard_derivatives = true;
+    ext->OES_texture_npot = true;
+    ext->OES_vertex_array_object = true;
+    ext->WEBGL_compressed_texture_etc = false;
+    ext->WEBGL_compressed_texture_s3tc = false;
+    ext->WEBGL_compressed_texture_s3tc_srgb = false;
 
     // OpenGL 4.2 implies ARB_shading_language_packing
     if (major > 4 || (major == 4 && minor >= 2)) {
-        ext.ARB_shading_language_packing = true;
+        ext->ARB_shading_language_packing = true;
     }
     // OpenGL 4.3 implies EXT_discard_framebuffer
     if (major > 4 || (major == 4 && minor >= 3)) {
-        ext.EXT_discard_framebuffer = true;
-        ext.KHR_debug = true;
+        ext->EXT_discard_framebuffer = true;
+        ext->KHR_debug = true;
     }
     // OpenGL 4.5 implies EXT_clip_control
     if (major > 4 || (major == 4 && minor >= 5)) {
-        ext.EXT_clip_control = true;
+        ext->EXT_clip_control = true;
     }
 }
 
@@ -683,7 +767,7 @@ void OpenGLContext::deleteBuffers(GLsizei n, const GLuint* buffers, GLenum targe
     }
 
 #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-    assert_invariant(state.major > 2 ||
+    assert_invariant(mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1 ||
             (target != GL_UNIFORM_BUFFER && target != GL_TRANSFORM_FEEDBACK_BUFFER));
 
     if (target == GL_UNIFORM_BUFFER || target == GL_TRANSFORM_FEEDBACK_BUFFER) {
diff --git a/filament/backend/src/opengl/OpenGLContext.h b/filament/backend/src/opengl/OpenGLContext.h
index 569d7749928..ff8b29cbd54 100644
--- a/filament/backend/src/opengl/OpenGLContext.h
+++ b/filament/backend/src/opengl/OpenGLContext.h
@@ -92,7 +92,7 @@ class OpenGLContext {
 #   ifndef BACKEND_OPENGL_LEVEL_GLES30
             return true;
 #   else
-            return state.major == 2;
+            return mFeatureLevel == FeatureLevel::FEATURE_LEVEL_0;
 #   endif
 #else
         return false;
@@ -151,7 +151,7 @@ class OpenGLContext {
     void deleteVertexArrays(GLsizei n, const GLuint* arrays) noexcept;
 
     // glGet*() values
-    struct {
+    struct Gets {
         GLfloat max_anisotropy;
         GLint max_draw_buffers;
         GLint max_renderbuffer_size;
@@ -170,7 +170,7 @@ class OpenGLContext {
     } features = {};
 
     // supported extensions detected at runtime
-    struct {
+    struct Extensions {
         bool APPLE_color_buffer_packed_float;
         bool ARB_shading_language_packing;
         bool EXT_clip_control;
@@ -209,7 +209,7 @@ class OpenGLContext {
         bool WEBGL_compressed_texture_s3tc_srgb;
     } ext = {};
 
-    struct {
+    struct Bugs {
         // Some drivers have issues with UBOs in the fragment shader when
         // glFlush() is called between draw calls.
         bool disable_glFlush;
@@ -275,6 +275,10 @@ class OpenGLContext {
         // performance more if we end-up pinned on the same one.
         bool disable_thread_affinity;
 
+        // Force feature level 0. Typically used for low end ES3 devices with significant driver
+        // bugs or performance issues.
+        bool force_feature_level0;
+
     } bugs = {};
 
     // state getters -- as needed.
@@ -397,7 +401,7 @@ class OpenGLContext {
         } window;
     } state;
 
-    struct {
+    struct Procs {
         void (* bindVertexArray)(GLuint array);
         void (* deleteVertexArrays)(GLsizei n, const GLuint* arrays);
         void (* genVertexArrays)(GLsizei n, GLuint* arrays);
@@ -467,18 +471,46 @@ class OpenGLContext {
             {   bugs.disable_thread_affinity,
                     "disable_thread_affinity",
                     ""},
+            {   bugs.force_feature_level0,
+                    "force_feature_level0",
+                    ""},
     }};
 
     RenderPrimitive mDefaultVAO;
 
     // this is chosen to minimize code size
 #if defined(BACKEND_OPENGL_VERSION_GLES)
-    void initExtensionsGLES() noexcept;
+    static void initExtensionsGLES(Extensions* ext, GLint major, GLint minor) noexcept;
 #endif
 #if defined(BACKEND_OPENGL_VERSION_GL)
-    void initExtensionsGL() noexcept;
+    static void initExtensionsGL(Extensions* ext, GLint major, GLint minor) noexcept;
 #endif
 
+    static void initExtensions(Extensions* ext, GLint major, GLint minor) noexcept {
+#if defined(BACKEND_OPENGL_VERSION_GLES)
+        initExtensionsGLES(ext, major, minor);
+#endif
+#if defined(BACKEND_OPENGL_VERSION_GL)
+        initExtensionsGL(ext, major, minor);
+#endif
+    }
+
+    static void initBugs(Bugs* bugs, Extensions const& exts,
+            GLint major, GLint minor,
+            char const* vendor,
+            char const* renderer,
+            char const* version,
+            char const* shader
+    );
+
+    static void initProcs(Procs* procs,
+            Extensions const& exts, GLint major, GLint minor) noexcept;
+
+    static FeatureLevel resolveFeatureLevel(GLint major, GLint minor,
+            Extensions const& exts,
+            Gets const& gets,
+            Bugs const& bugs) noexcept;
+
     template <typename T, typename F>
     static inline void update_state(T& state, T const& expected, F functor, bool force = false) noexcept {
         if (UTILS_UNLIKELY(force || state != expected)) {
@@ -571,7 +603,7 @@ void OpenGLContext::activeTexture(GLuint unit) noexcept {
 
 void OpenGLContext::bindSampler(GLuint unit, GLuint sampler) noexcept {
     assert_invariant(unit < MAX_TEXTURE_UNIT_COUNT);
-    assert_invariant(state.major > 2);
+    assert_invariant(mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1);
 #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
     update_state(state.textures.units[unit].sampler, sampler, [&]() {
         glBindSampler(unit, sampler);
@@ -617,7 +649,7 @@ void OpenGLContext::bindVertexArray(RenderPrimitive const* p) noexcept {
 
 void OpenGLContext::bindBufferRange(GLenum target, GLuint index, GLuint buffer,
         GLintptr offset, GLsizeiptr size) noexcept {
-    assert_invariant(state.major > 2);
+    assert_invariant(mFeatureLevel >= FeatureLevel::FEATURE_LEVEL_1);
 
 #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
 #   ifdef BACKEND_OPENGL_LEVEL_GLES31
diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp
index 47cc50edfc2..e2172ae8add 100644
--- a/filament/backend/src/opengl/OpenGLDriver.cpp
+++ b/filament/backend/src/opengl/OpenGLDriver.cpp
@@ -1872,6 +1872,18 @@ bool OpenGLDriver::isSRGBSwapChainSupported() {
     return mPlatform.isSRGBSwapChainSupported();
 }
 
+bool OpenGLDriver::isStereoSupported() {
+    // Stereo requires instancing and EXT_clip_cull_distance.
+    if (UTILS_UNLIKELY(mContext.isES2())) {
+        return false;
+    }
+    return mContext.ext.EXT_clip_cull_distance;
+}
+
+bool OpenGLDriver::isParallelShaderCompileSupported() {
+    return mShaderCompilerService.isParallelShaderCompileSupported();
+}
+
 bool OpenGLDriver::isWorkaroundNeeded(Workaround workaround) {
     switch (workaround) {
         case Workaround::SPLIT_EASU:
@@ -2593,7 +2605,7 @@ bool OpenGLDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapse
 void OpenGLDriver::compilePrograms(CompilerPriorityQueue priority,
         CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
     if (callback) {
-        getShaderCompilerService().notifyWhenAllProgramsAreReady(priority, handler, callback, user);
+        getShaderCompilerService().notifyWhenAllProgramsAreReady(handler, callback, user);
     }
 }
 
@@ -3258,7 +3270,7 @@ void OpenGLDriver::setFrameScheduledCallback(Handle<HwSwapChain> sch,
 }
 
 void OpenGLDriver::setFrameCompletedCallback(Handle<HwSwapChain> sch,
-        FrameCompletedCallback callback, void* user) {
+        CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
     DEBUG_MARKER()
 }
 
diff --git a/filament/backend/src/opengl/OpenGLPlatform.cpp b/filament/backend/src/opengl/OpenGLPlatform.cpp
index 4297479d97f..837faecdc4c 100644
--- a/filament/backend/src/opengl/OpenGLPlatform.cpp
+++ b/filament/backend/src/opengl/OpenGLPlatform.cpp
@@ -116,4 +116,7 @@ bool OpenGLPlatform::isExtraContextSupported() const noexcept {
 void OpenGLPlatform::createContext(bool) {
 }
 
+void OpenGLPlatform::releaseContext() noexcept {
+}
+
 } // namespace filament::backend
diff --git a/filament/backend/src/opengl/ShaderCompilerService.cpp b/filament/backend/src/opengl/ShaderCompilerService.cpp
index 3dfdea5f478..d3d4cdd0658 100644
--- a/filament/backend/src/opengl/ShaderCompilerService.cpp
+++ b/filament/backend/src/opengl/ShaderCompilerService.cpp
@@ -64,17 +64,17 @@ static inline std::string to_string(float f) noexcept {
 // ------------------------------------------------------------------------------------------------
 
 struct ShaderCompilerService::OpenGLProgramToken : ProgramToken {
-    struct ProgramBinary {
-        GLenum format{};
+    struct ProgramData {
         GLuint program{};
         std::array<GLuint, Program::SHADER_TYPE_COUNT> shaders{};
-        std::vector<char> blob;
     };
 
-    ~OpenGLProgramToken();
+    ~OpenGLProgramToken() override;
+
     OpenGLProgramToken(ShaderCompilerService& compiler, utils::CString const& name) noexcept
             : compiler(compiler), name(name) {
     }
+
     ShaderCompilerService& compiler;
     utils::CString const& name;
     utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> attributes;
@@ -86,22 +86,21 @@ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken {
     } gl; // 12 bytes
 
 
-    // Sets the programBinary, typically from the compiler thread, and signal the main thread.
+    // Sets the programData, typically from the compiler thread, and signal the main thread.
     // This is similar to std::promise::set_value.
-    void set(ProgramBinary programBinary) noexcept {
-        using std::swap;
+    void set(ProgramData const& data) noexcept {
         std::unique_lock const l(lock);
-        swap(binary, programBinary);
+        programData = data;
         signaled = true;
         cond.notify_one();
     }
 
     // Get the programBinary, wait if necessary.
     // This is similar to std::future::get
-    ProgramBinary const& get() const noexcept {
+    ProgramData const& get() const noexcept {
         std::unique_lock l(lock);
         cond.wait(l, [this](){ return signaled; });
-        return binary;
+        return programData;
     }
 
     // Checks if the programBinary is ready.
@@ -112,10 +111,11 @@ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken {
         return cond.wait_for(l, 0s, [this](){ return signaled; });
     }
 
+    CallbackManager::Handle handle{};
     BlobCacheKey key;
     mutable utils::Mutex lock;
     mutable utils::Condition cond;
-    ProgramBinary binary;
+    ProgramData programData;
     bool signaled = false;
 
     bool canceled = false; // not part of the signaling
@@ -135,11 +135,16 @@ void* ShaderCompilerService::getUserData(const program_token_t& token) noexcept
 
 ShaderCompilerService::ShaderCompilerService(OpenGLDriver& driver)
         : mDriver(driver),
+          mCallbackManager(driver),
           KHR_parallel_shader_compile(driver.getContext().ext.KHR_parallel_shader_compile) {
 }
 
 ShaderCompilerService::~ShaderCompilerService() noexcept = default;
 
+bool ShaderCompilerService::isParallelShaderCompileSupported() const noexcept {
+    return KHR_parallel_shader_compile || mShaderCompilerThreadCount;
+}
+
 void ShaderCompilerService::init() noexcept {
     // If we have KHR_parallel_shader_compile, we always use it, it should be more resource
     // friendly.
@@ -170,32 +175,34 @@ void ShaderCompilerService::init() noexcept {
             }
 
             mShaderCompilerThreadCount = poolSize;
-            mCompilerThreadPool.init(mShaderCompilerThreadCount, priority,
-                    [platform = &mDriver.mPlatform, sharedContext = mUseSharedContext]() {
+            mCompilerThreadPool.init(mShaderCompilerThreadCount,
+                    [&platform = mDriver.mPlatform, priority]() {
+                        // give the thread a name
+                        JobSystem::setThreadName("CompilerThreadPool");
+                        // run at a slightly lower priority than other filament threads
+                        JobSystem::setThreadPriority(priority);
                         // create a gl context current to this thread
-                        platform->createContext(sharedContext);
+                        platform.createContext(true);
+                    },
+                    [&platform = mDriver.mPlatform]() {
+                        // release context and thread state
+                        platform.releaseContext();
                     });
         }
     }
 }
 
 void ShaderCompilerService::terminate() noexcept {
-    // We could have some pending callbacks here, we need to execute them.
-    // This is equivalent to calling cancelTickOp() on all active tokens.
-    for (auto&& op: mRunAtNextTickOps) {
-        auto const& [priority, token, job] = op;
-        if (!token && job.callback) {
-            // This is a little fragile here. We know by construction that jobs that have a
-            // null token are the ones that dispatch the user callbacks.
-            mDriver.scheduleCallback(job.handler, job.user, job.callback);
-        }
-    }
-    mRunAtNextTickOps.clear();
-
     // Finally stop the thread pool immediately. Pending jobs will be discarded. We guarantee by
     // construction that nobody is waiting on a token (because waiting is only done on the main
     // backend thread, and if we're here, we're on the backend main thread).
     mCompilerThreadPool.terminate();
+
+    mRunAtNextTickOps.clear();
+
+    // We could have some pending callbacks here, we need to execute them.
+    // This is equivalent to calling cancelTickOp() on all active tokens.
+    mCallbackManager.terminate();
 }
 
 ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
@@ -203,132 +210,104 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
     auto& gl = mDriver.getContext();
 
     auto token = std::make_shared<OpenGLProgramToken>(*this, name);
-
     if (UTILS_UNLIKELY(gl.isES2())) {
         token->attributes = std::move(program.getAttributes());
     }
 
     token->gl.program = OpenGLBlobCache::retrieve(&token->key, mDriver.mPlatform, program);
-    if (!token->gl.program) {
-        CompilerPriorityQueue const priorityQueue = program.getPriorityQueue();
-        if (mShaderCompilerThreadCount) {
-            // queue a compile job
-            mCompilerThreadPool.queue(priorityQueue, token,
-                    [this, &gl, program = std::move(program), token]() mutable {
-
-                        // compile the shaders
-                        std::array<GLuint, Program::SHADER_TYPE_COUNT> shaders{};
-                        std::array<utils::CString, Program::SHADER_TYPE_COUNT> shaderSourceCode;
-                        compileShaders(gl,
-                                std::move(program.getShadersSource()),
-                                program.getSpecializationConstants(),
-                                shaders,
-                                shaderSourceCode);
-
-                        // link the program
-                        GLuint const glProgram = linkProgram(gl, shaders, token->attributes);
-
-                        OpenGLProgramToken::ProgramBinary binary;
-                        binary.shaders = shaders;
-
-                        if (UTILS_LIKELY(mUseSharedContext)) {
-                            // We need to query the link status here to guarantee that the
-                            // program is compiled and linked now (we don't want this to be
-                            // deferred to later). We don't care about the result at this point.
-                            GLint status;
-                            glGetProgramiv(glProgram, GL_LINK_STATUS, &status);
-                            binary.program = glProgram;
-                            if (token->key) {
-                                // Attempt to cache. This calls glGetProgramBinary.
-                                OpenGLBlobCache::insert(mDriver.mPlatform, token->key, glProgram);
-                            }
-                        }
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-                        else {
-                            // retrieve the program binary
-                            GLsizei programBinarySize = 0;
-                            glGetProgramiv(glProgram, GL_PROGRAM_BINARY_LENGTH, &programBinarySize);
-                            assert_invariant(programBinarySize);
-                            if (programBinarySize) {
-                                binary.blob.resize(programBinarySize);
-                                glGetProgramBinary(glProgram, programBinarySize,
-                                        &programBinarySize, &binary.format, binary.blob.data());
-                            }
-                            // and we can destroy the program
-                            glDeleteProgram(glProgram);
-                            if (token->key) {
-                                // attempt to cache
-                                OpenGLBlobCache::insert(mDriver.mPlatform, token->key,
-                                        binary.format,
-                                        binary.blob.data(), GLsizei(binary.blob.size()));
-                            }
-                        }
-#endif
-                        // we don't need to check for success here, it'll be done on the
-                        // main thread side.
-                        token->set(std::move(binary));
-                    });
-        } else
-        {
-            // this cannot fail because we check compilation status after linking the program
-            // shaders[] is filled with id of shader stages present.
-            compileShaders(gl,
-                    std::move(program.getShadersSource()),
-                    program.getSpecializationConstants(),
-                    token->gl.shaders,
-                    token->shaderSourceCode);
+    if (token->gl.program) {
+        return token;
+    }
 
-        }
+    token->handle = mCallbackManager.get();
+
+    CompilerPriorityQueue const priorityQueue = program.getPriorityQueue();
+    if (mShaderCompilerThreadCount) {
+        // queue a compile job
+        mCompilerThreadPool.queue(priorityQueue, token,
+                [this, &gl, program = std::move(program), token]() mutable {
+                    // compile the shaders
+                    std::array<GLuint, Program::SHADER_TYPE_COUNT> shaders{};
+                    std::array<utils::CString, Program::SHADER_TYPE_COUNT> shaderSourceCode;
+                    compileShaders(gl,
+                            std::move(program.getShadersSource()),
+                            program.getSpecializationConstants(),
+                            shaders,
+                            shaderSourceCode);
+
+                    // link the program
+                    GLuint const glProgram = linkProgram(gl, shaders, token->attributes);
+
+                    OpenGLProgramToken::ProgramData programData;
+                    programData.shaders = shaders;
+
+                    // We need to query the link status here to guarantee that the
+                    // program is compiled and linked now (we don't want this to be
+                    // deferred to later). We don't care about the result at this point.
+                    GLint status;
+                    glGetProgramiv(glProgram, GL_LINK_STATUS, &status);
+                    programData.program = glProgram;
+
+                    token->gl.program = programData.program;
+
+                    // we don't need to check for success here, it'll be done on the
+                    // main thread side.
+                    token->set(programData);
+
+                    mCallbackManager.put(token->handle);
+
+                    // caching must be the last thing we do
+                    if (token->key) {
+                        // Attempt to cache. This calls glGetProgramBinary.
+                        OpenGLBlobCache::insert(mDriver.mPlatform, token->key, glProgram);
+                    }
+                });
+
+    } else {
+        // this cannot fail because we check compilation status after linking the program
+        // shaders[] is filled with id of shader stages present.
+        compileShaders(gl,
+                std::move(program.getShadersSource()),
+                program.getSpecializationConstants(),
+                token->gl.shaders,
+                token->shaderSourceCode);
 
         runAtNextTick(priorityQueue, token, [this, token](Job const&) {
-            if (mShaderCompilerThreadCount) {
-                if (!token->gl.program) {
-                    // TODO: see if we could completely eliminate this callback here
-                    //       and instead just rely on token->gl.program being atomically
-                    //       set by the compiler thread.
-                    // we're using the compiler thread, check if the program is ready, no-op if not.
-                    if (!token->isReady()) {
+            if (KHR_parallel_shader_compile) {
+                // don't attempt to link this program if all shaders are not done compiling
+                GLint status;
+                if (token->gl.program) {
+                    glGetProgramiv(token->gl.program, GL_COMPLETION_STATUS, &status);
+                    if (status == GL_FALSE) {
                         return false;
                     }
-                    // program binary is ready, retrieve it without blocking
-                    ShaderCompilerService::getProgramFromCompilerPool(
-                            const_cast<program_token_t&>(token));
-                }
-            } else {
-                if (KHR_parallel_shader_compile) {
-                    // don't attempt to link this program if all shaders are not done compiling
-                    GLint status;
-                    if (token->gl.program) {
-                        glGetProgramiv(token->gl.program, GL_COMPLETION_STATUS, &status);
-                        if (status == GL_FALSE) {
-                            return false;
-                        }
-                    } else {
-                        for (auto shader: token->gl.shaders) {
-                            if (shader) {
-                                glGetShaderiv(shader, GL_COMPLETION_STATUS, &status);
-                                if (status == GL_FALSE) {
-                                    return false;
-                                }
+                } else {
+                    for (auto shader: token->gl.shaders) {
+                        if (shader) {
+                            glGetShaderiv(shader, GL_COMPLETION_STATUS, &status);
+                            if (status == GL_FALSE) {
+                                return false;
                             }
                         }
                     }
                 }
+            }
 
-                if (!token->gl.program) {
-                    // link the program, this also cannot fail because status is checked later.
-                    token->gl.program = linkProgram(mDriver.getContext(),
-                            token->gl.shaders, token->attributes);
-                    if (KHR_parallel_shader_compile) {
-                        // wait until the link finishes...
-                        return false;
-                    }
+            if (!token->gl.program) {
+                // link the program, this also cannot fail because status is checked later.
+                token->gl.program = linkProgram(mDriver.getContext(),
+                        token->gl.shaders, token->attributes);
+                if (KHR_parallel_shader_compile) {
+                    // wait until the link finishes...
+                    return false;
                 }
             }
 
             assert_invariant(token->gl.program);
 
-            if (token->key && !mShaderCompilerThreadCount) {
+            mCallbackManager.put(token->handle);
+
+            if (token->key) {
                 // TODO: technically we don't have to cache right now. Is it advantageous to
                 //       do this later, maybe depending on CPU usage?
                 // attempt to cache if we don't have a thread pool (otherwise it's done
@@ -343,31 +322,12 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
     return token;
 }
 
-bool ShaderCompilerService::isProgramReady(
-        const ShaderCompilerService::program_token_t& token) const noexcept {
-
-    assert_invariant(token);
-
-    if (!token->gl.program) {
-        return false;
-    }
-
-    if (KHR_parallel_shader_compile) {
-        GLint status = GL_FALSE;
-        glGetProgramiv(token->gl.program, GL_COMPLETION_STATUS, &status);
-        return (bool)status;
-    }
-
-    // If gl.program is set, this means the program was linked. Some drivers may defer the link
-    // in which case we might block in getProgram() when we check the program status.
-    // Unfortunately, this is nothing we can do about that.
-    return bool(token->gl.program);
-}
-
 GLuint ShaderCompilerService::getProgram(ShaderCompilerService::program_token_t& token) {
     GLuint const program = initialize(token);
     assert_invariant(token == nullptr);
+#ifndef FILAMENT_ENABLE_MATDBG
     assert_invariant(program);
+#endif
     return program;
 }
 
@@ -395,74 +355,26 @@ GLuint ShaderCompilerService::getProgram(ShaderCompilerService::program_token_t&
 }
 
 void ShaderCompilerService::tick() {
-    executeTickOps();
+    // we don't need to run executeTickOps() if we're using the thread-pool
+    if (UTILS_UNLIKELY(!mShaderCompilerThreadCount)) {
+        executeTickOps();
+    }
 }
 
-void ShaderCompilerService::notifyWhenAllProgramsAreReady(CompilerPriorityQueue priority,
+void ShaderCompilerService::notifyWhenAllProgramsAreReady(
         CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
-
-    if (KHR_parallel_shader_compile || mShaderCompilerThreadCount) {
-        // list all programs up to this point, both low and high priority
-
-        using TokenVector = utils::FixedCapacityVector<
-                program_token_t, std::allocator<program_token_t>, false>;
-        TokenVector tokens{ TokenVector::with_capacity(mRunAtNextTickOps.size()) };
-
-        for (auto& [itemPriority, token, job] : mRunAtNextTickOps) {
-            if (token && job.fn && itemPriority == priority) {
-                tokens.push_back(token);
-            }
-        }
-
-        runAtNextTick(priority, nullptr, {
-                [this, tokens = std::move(tokens)](Job const& job) {
-            for (auto const& token : tokens) {
-                assert_invariant(token);
-                if (!isProgramReady(token)) {
-                    // one of the program is not ready, try next time
-                    return false;
-                }
-            }
-            if (job.callback) {
-                // all programs are ready, we can call the callbacks
-                mDriver.scheduleCallback(job.handler, job.user, job.callback);
-            }
-            // and we're done
-            return true;
-        }, handler, user, callback });
-
-        return;
+    if (callback) {
+        mCallbackManager.setCallback(handler, callback, user);
     }
-
-    // we don't have KHR_parallel_shader_compile
-
-    runAtNextTick(priority, nullptr, {[this](Job const& job) {
-        mDriver.scheduleCallback(job.handler, job.user, job.callback);
-        return true;
-    }, handler, user, callback });
-
-    // TODO: we could spread the compiles over several frames, the tick() below then is not
-    //       needed here. We keep it for now as to not change the current behavior too much.
-    // this will block until all programs are linked
-    tick();
 }
 
 // ------------------------------------------------------------------------------------------------
 
 void ShaderCompilerService::getProgramFromCompilerPool(program_token_t& token) noexcept {
-    OpenGLProgramToken::ProgramBinary const& binary{ token->get() };
+    OpenGLProgramToken::ProgramData const& programData{ token->get() };
     if (!token->canceled) {
-        token->gl.shaders = binary.shaders;
-        if (UTILS_LIKELY(mUseSharedContext)) {
-            token->gl.program = binary.program;
-        }
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-        else {
-            token->gl.program = glCreateProgram();
-            glProgramBinary(token->gl.program, binary.format,
-                    binary.blob.data(), GLsizei(binary.blob.size()));
-        }
-#endif
+        token->gl.shaders = programData.shaders;
+        token->gl.program = programData.program;
     }
 }
 
@@ -489,8 +401,17 @@ GLuint ShaderCompilerService::initialize(program_token_t& token) noexcept {
             // we force the program link -- which might stall, either here or below in
             // checkProgramStatus(), but we don't have a choice, we need to use the program now.
             token->compiler.cancelTickOp(token);
+
             token->gl.program = linkProgram(mDriver.getContext(),
                     token->gl.shaders, token->attributes);
+
+            assert_invariant(token->gl.program);
+
+            mCallbackManager.put(token->handle);
+
+            if (token->key) {
+                OpenGLBlobCache::insert(mDriver.mPlatform, token->key, token->gl.program);
+            }
         } else {
             // if we don't have a program yet, block until we get it.
             tick();
diff --git a/filament/backend/src/opengl/ShaderCompilerService.h b/filament/backend/src/opengl/ShaderCompilerService.h
index 668d4e31f24..0d8cb191929 100644
--- a/filament/backend/src/opengl/ShaderCompilerService.h
+++ b/filament/backend/src/opengl/ShaderCompilerService.h
@@ -19,6 +19,7 @@
 
 #include "gl_headers.h"
 
+#include "CallbackManager.h"
 #include "CompilerThreadPool.h"
 
 #include <backend/CallbackHandler.h>
@@ -65,16 +66,14 @@ class ShaderCompilerService {
 
     ~ShaderCompilerService() noexcept;
 
+    bool isParallelShaderCompileSupported() const noexcept;
+
     void init() noexcept;
     void terminate() noexcept;
 
     // creates a program (compile + link) asynchronously if supported
     program_token_t createProgram(utils::CString const& name, Program&& program);
 
-    // Returns true if the program is linked (successfully or not). Guarantees that
-    // getProgram() won't block. Does not block.
-    bool isProgramReady(const program_token_t& token) const noexcept;
-
     // Return the GL program, blocks if necessary. The Token is destroyed and becomes invalid.
     GLuint getProgram(program_token_t& token);
 
@@ -91,20 +90,17 @@ class ShaderCompilerService {
     static void* getUserData(const program_token_t& token) noexcept;
 
     // call the callback when all active programs are ready
-    void notifyWhenAllProgramsAreReady(CompilerPriorityQueue priority,
+    void notifyWhenAllProgramsAreReady(
             CallbackHandler* handler, CallbackHandler::Callback callback, void* user);
 
 private:
     OpenGLDriver& mDriver;
+    CallbackManager mCallbackManager;
     CompilerThreadPool mCompilerThreadPool;
 
     const bool KHR_parallel_shader_compile;
     uint32_t mShaderCompilerThreadCount = 0u;
 
-    // For now, we assume shared contexts are supported everywhere. If they are not,
-    // we don't use the shader compiler pool. However, the code supports it.
-    static constexpr bool mUseSharedContext = true;
-
     GLuint initialize(ShaderCompilerService::program_token_t& token) noexcept;
 
     static void getProgramFromCompilerPool(program_token_t& token) noexcept;
diff --git a/filament/backend/src/opengl/platforms/PlatformEGL.cpp b/filament/backend/src/opengl/platforms/PlatformEGL.cpp
index 08b20e0b42c..60652b54156 100644
--- a/filament/backend/src/opengl/platforms/PlatformEGL.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformEGL.cpp
@@ -115,9 +115,14 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon
 
     auto extensions = GLUtils::split(eglQueryString(mEGLDisplay, EGL_EXTENSIONS));
     ext.egl.ANDROID_recordable = extensions.has("EGL_ANDROID_recordable");
-    ext.egl.KHR_create_context = extensions.has("EGL_KHR_create_context");
     ext.egl.KHR_gl_colorspace = extensions.has("EGL_KHR_gl_colorspace");
+    ext.egl.KHR_create_context = extensions.has("EGL_KHR_create_context");
     ext.egl.KHR_no_config_context = extensions.has("EGL_KHR_no_config_context");
+    ext.egl.KHR_surfaceless_context = extensions.has("KHR_surfaceless_context");
+    if (ext.egl.KHR_create_context) {
+        // KHR_create_context implies KHR_surfaceless_context for ES3.x contexts
+        ext.egl.KHR_surfaceless_context = true;
+    }
 
     eglCreateSyncKHR = (PFNEGLCREATESYNCKHRPROC) eglGetProcAddress("eglCreateSyncKHR");
     eglDestroySyncKHR = (PFNEGLDESTROYSYNCKHRPROC) eglGetProcAddress("eglDestroySyncKHR");
@@ -181,13 +186,6 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon
         eglConfig = mEGLConfig;
     }
 
-    // create the dummy surface, just for being able to make the context current.
-    mEGLDummySurface = eglCreatePbufferSurface(mEGLDisplay, mEGLConfig, pbufferAttribs);
-    if (UTILS_UNLIKELY(mEGLDummySurface == EGL_NO_SURFACE)) {
-        logEglError("eglCreatePbufferSurface");
-        goto error;
-    }
-
     for (size_t tries = 0; tries < 3; tries++) {
         mEGLContext = eglCreateContext(mEGLDisplay, eglConfig,
                 (EGLContext)sharedContext, contextAttribs.data());
@@ -220,6 +218,26 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon
         goto error;
     }
 
+    if (ext.egl.KHR_surfaceless_context) {
+        // Adreno 306 driver advertises KHR_create_context but doesn't support passing
+        // EGL_NO_SURFACE to eglMakeCurrent with a 3.0 context.
+        if (UTILS_UNLIKELY(!eglMakeCurrent(mEGLDisplay,
+                EGL_NO_SURFACE, EGL_NO_SURFACE, mEGLContext))) {
+            if (eglGetError() == EGL_BAD_MATCH) {
+                ext.egl.KHR_surfaceless_context = false;
+            }
+        }
+    }
+
+    if (UTILS_UNLIKELY(!ext.egl.KHR_surfaceless_context)) {
+        // create the dummy surface, just for being able to make the context current.
+        mEGLDummySurface = eglCreatePbufferSurface(mEGLDisplay, mEGLConfig, pbufferAttribs);
+        if (UTILS_UNLIKELY(mEGLDummySurface == EGL_NO_SURFACE)) {
+            logEglError("eglCreatePbufferSurface");
+            goto error;
+        }
+    }
+
     if (UTILS_UNLIKELY(!makeCurrent(mEGLDummySurface, mEGLDummySurface))) {
         // eglMakeCurrent failed
         logEglError("eglMakeCurrent");
@@ -255,7 +273,7 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon
 }
 
 bool PlatformEGL::isExtraContextSupported() const noexcept {
-    return true;
+    return ext.egl.KHR_surfaceless_context;
 }
 
 void PlatformEGL::createContext(bool shared) {
@@ -276,6 +294,22 @@ void PlatformEGL::createContext(bool shared) {
     mAdditionalContexts.push_back(context);
 }
 
+void PlatformEGL::releaseContext() noexcept {
+    EGLContext context = eglGetCurrentContext();
+    eglMakeCurrent(mEGLDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
+    if (context != EGL_NO_CONTEXT) {
+        eglDestroyContext(mEGLDisplay, context);
+    }
+
+    mAdditionalContexts.erase(
+            std::remove_if(mAdditionalContexts.begin(), mAdditionalContexts.end(),
+                    [context](EGLContext c) {
+                        return c == context;
+                    }), mAdditionalContexts.end());
+
+    eglReleaseThread();
+}
+
 EGLBoolean PlatformEGL::makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept {
     if (UTILS_UNLIKELY((drawSurface != mCurrentDrawSurface || readSurface != mCurrentReadSurface))) {
         mCurrentDrawSurface = drawSurface;
@@ -286,8 +320,11 @@ EGLBoolean PlatformEGL::makeCurrent(EGLSurface drawSurface, EGLSurface readSurfa
 }
 
 void PlatformEGL::terminate() noexcept {
+    // it's always allowed to use EGL_NO_SURFACE, EGL_NO_CONTEXT
     eglMakeCurrent(mEGLDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
-    eglDestroySurface(mEGLDisplay, mEGLDummySurface);
+    if (mEGLDummySurface) {
+        eglDestroySurface(mEGLDisplay, mEGLDummySurface);
+    }
     eglDestroyContext(mEGLDisplay, mEGLContext);
     for (auto context : mAdditionalContexts) {
         eglDestroyContext(mEGLDisplay, context);
diff --git a/filament/backend/src/vulkan/VulkanContext.h b/filament/backend/src/vulkan/VulkanContext.h
index a22951deb61..2dba9d3bdeb 100644
--- a/filament/backend/src/vulkan/VulkanContext.h
+++ b/filament/backend/src/vulkan/VulkanContext.h
@@ -97,8 +97,7 @@ struct VulkanContext {
             }
             flags >>= 1;
         }
-        ASSERT_POSTCONDITION(false, "Unable to find a memory type that meets requirements.");
-        return (uint32_t) ~0ul;
+        return (uint32_t) VK_MAX_MEMORY_TYPES;
     }
 
     inline VkFormat getDepthFormat() const {
diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp
index 374f6ffe876..c1c35014e6b 100644
--- a/filament/backend/src/vulkan/VulkanDriver.cpp
+++ b/filament/backend/src/vulkan/VulkanDriver.cpp
@@ -276,7 +276,7 @@ void VulkanDriver::setFrameScheduledCallback(Handle<HwSwapChain> sch,
 }
 
 void VulkanDriver::setFrameCompletedCallback(Handle<HwSwapChain> sch,
-        FrameCompletedCallback callback, void* user) {
+        CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
 }
 
 void VulkanDriver::setPresentationTime(int64_t monotonic_clock_ns) {
@@ -755,6 +755,14 @@ bool VulkanDriver::isSRGBSwapChainSupported() {
     return mPlatform->isSRGBSwapChainSupported();
 }
 
+bool VulkanDriver::isStereoSupported() {
+    return true;
+}
+
+bool VulkanDriver::isParallelShaderCompileSupported() {
+    return false;
+}
+
 bool VulkanDriver::isWorkaroundNeeded(Workaround workaround) {
     switch (workaround) {
         case Workaround::SPLIT_EASU: {
diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp
index e3d4579cd37..29a9f023ba5 100644
--- a/filament/backend/src/vulkan/VulkanHandles.cpp
+++ b/filament/backend/src/vulkan/VulkanHandles.cpp
@@ -87,21 +87,26 @@ VulkanProgram::VulkanProgram(VkDevice device, const Program& builder) noexcept :
         };
 
         for (size_t i = 0; i < specializationConstants.size(); i++) {
-            const uint32_t offset = uint32_t(i) * 4;
-            std::visit([&](auto&& arg) {
-                using T = std::decay_t<decltype(arg)>;
-                pEntries[i] = {
-                        .constantID = specializationConstants[i].id,
-                        .offset = offset,
-                        // Turns out vulkan expects the size of bool to be 4 (verified through
-                        // validation layer). So all expected types are of 4 bytes.
-                        .size = 4,
-                };
-                T* const addr = (T*)((char*)pData + offset);
-                *addr = arg;
-            }, specializationConstants[i].value);
+            uint32_t const offset = uint32_t(i) * 4;
+            pEntries[i] = {
+                .constantID = specializationConstants[i].id,
+                .offset = offset,
+                // Note that bools are 4-bytes in Vulkan
+                // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkBool32.html
+                .size = 4,
+            };
+
+            using SpecConstant = Program::SpecializationConstant::Type;
+            char const* addr = (char*)pData + offset;
+            SpecConstant const& arg = specializationConstants[i].value;
+            if (std::holds_alternative<bool>(arg)) {
+                *((VkBool32*)addr) = std::get<bool>(arg) ? VK_TRUE : VK_FALSE;
+            } else if (std::holds_alternative<float>(arg)) {
+                *((float*)addr) = std::get<float>(arg);
+            } else {
+                *((int32_t*)addr) = std::get<int32_t>(arg);
+            }
         }
-
         bundle.specializationInfos = pInfo;
     }
 
diff --git a/filament/backend/src/vulkan/VulkanReadPixels.cpp b/filament/backend/src/vulkan/VulkanReadPixels.cpp
index e299d206597..fd51344b830 100644
--- a/filament/backend/src/vulkan/VulkanReadPixels.cpp
+++ b/filament/backend/src/vulkan/VulkanReadPixels.cpp
@@ -176,12 +176,28 @@ void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x
     VkMemoryRequirements memReqs;
     VkDeviceMemory stagingMemory;
     vkGetImageMemoryRequirements(device, stagingImage, &memReqs);
+
+    uint32_t memoryTypeIndex = selectMemoryFunc(memReqs.memoryTypeBits,
+            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+                    | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+
+    // If VK_MEMORY_PROPERTY_HOST_CACHED_BIT is not supported, we try only
+    // HOST_VISIBLE+HOST_COHERENT.  HOST_CACHED helps a lot with readpixels performance.
+    if (memoryTypeIndex >= VK_MAX_MEMORY_TYPES) {
+        memoryTypeIndex = selectMemoryFunc(memReqs.memoryTypeBits,
+                VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+        utils::slog.w
+                << "readPixels is slow because VK_MEMORY_PROPERTY_HOST_CACHED_BIT is not available"
+                << utils::io::endl;
+    }
+
+    ASSERT_POSTCONDITION(memoryTypeIndex < VK_MAX_MEMORY_TYPES,
+            "VulkanReadPixels: unable to find a memory type that meets requirements.");
+
     VkMemoryAllocateInfo const allocInfo = {
             .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
             .allocationSize = memReqs.size,
-            .memoryTypeIndex = selectMemoryFunc(memReqs.memoryTypeBits,
-                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
-                            | VK_MEMORY_PROPERTY_HOST_CACHED_BIT),
+            .memoryTypeIndex = memoryTypeIndex,
     };
 
     vkAllocateMemory(device, &allocInfo, VKALLOC, &stagingMemory);
diff --git a/filament/backend/src/vulkan/VulkanTexture.cpp b/filament/backend/src/vulkan/VulkanTexture.cpp
index 6b86def4e8a..8df63626a3d 100644
--- a/filament/backend/src/vulkan/VulkanTexture.cpp
+++ b/filament/backend/src/vulkan/VulkanTexture.cpp
@@ -167,11 +167,17 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice,
     // Allocate memory for the VkImage and bind it.
     VkMemoryRequirements memReqs = {};
     vkGetImageMemoryRequirements(mDevice, mTextureImage, &memReqs);
+
+    uint32_t memoryTypeIndex
+            = context.selectMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+    ASSERT_POSTCONDITION(memoryTypeIndex < VK_MAX_MEMORY_TYPES,
+            "VulkanTexture: unable to find a memory type that meets requirements.");
+
     VkMemoryAllocateInfo allocInfo = {
         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
         .allocationSize = memReqs.size,
-        .memoryTypeIndex = context.selectMemoryType(memReqs.memoryTypeBits,
-                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+        .memoryTypeIndex = memoryTypeIndex,
     };
     error = vkAllocateMemory(mDevice, &allocInfo, nullptr, &mTextureImageMemory);
     ASSERT_POSTCONDITION(!error, "Unable to allocate image memory.");
diff --git a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp
index 41a4f891115..f83e8be93b9 100644
--- a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp
+++ b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp
@@ -57,11 +57,17 @@ std::tuple<VkImage, VkDeviceMemory> createImageAndMemory(VulkanContext const& co
     VkDeviceMemory imageMemory;
     VkMemoryRequirements memReqs;
     vkGetImageMemoryRequirements(device, image, &memReqs);
+
+    uint32_t memoryTypeIndex
+            = context.selectMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+    ASSERT_POSTCONDITION(memoryTypeIndex < VK_MAX_MEMORY_TYPES,
+            "VulkanPlatformSwapChainImpl: unable to find a memory type that meets requirements.");
+
     VkMemoryAllocateInfo allocInfo = {
             .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
             .allocationSize = memReqs.size,
-            .memoryTypeIndex
-            = context.selectMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+            .memoryTypeIndex = memoryTypeIndex,
     };
     result = vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory);
     ASSERT_POSTCONDITION(result == VK_SUCCESS, "Unable to allocate image memory.");
diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h
index e4d601b3cde..f4173f26144 100644
--- a/filament/include/filament/Engine.h
+++ b/filament/include/filament/Engine.h
@@ -513,6 +513,14 @@ class UTILS_PUBLIC Engine {
      */
     size_t getMaxAutomaticInstances() const noexcept;
 
+    /**
+     * Queries the device and platform for instanced stereo rendering support.
+     *
+     * @return true if stereo rendering is supported, false otherwise
+     * @see View::setStereoscopicOptions
+     */
+    bool isStereoSupported() const noexcept;
+
     /**
      * @return EntityManager used by filament
      */
diff --git a/filament/include/filament/SwapChain.h b/filament/include/filament/SwapChain.h
index 9f7a328199e..29413275a42 100644
--- a/filament/include/filament/SwapChain.h
+++ b/filament/include/filament/SwapChain.h
@@ -18,10 +18,13 @@
 #define TNT_FILAMENT_SWAPCHAIN_H
 
 #include <filament/FilamentAPI.h>
+
+#include <backend/CallbackHandler.h>
 #include <backend/DriverEnums.h>
 #include <backend/PresentCallable.h>
 
 #include <utils/compiler.h>
+#include <utils/Invocable.h>
 
 namespace filament {
 
@@ -148,7 +151,7 @@ class Engine;
 class UTILS_PUBLIC SwapChain : public FilamentAPI {
 public:
     using FrameScheduledCallback = backend::FrameScheduledCallback;
-    using FrameCompletedCallback = backend::FrameCompletedCallback;
+    using FrameCompletedCallback = utils::Invocable<void(SwapChain*)>;
 
     /**
      * Requests a SwapChain with an alpha channel.
@@ -241,17 +244,23 @@ class UTILS_PUBLIC SwapChain : public FilamentAPI {
      * contents have completed rendering on the GPU.
      *
      * Use SwapChain::setFrameCompletedCallback to set a callback on an individual SwapChain. Each
-     * time a frame completes GPU rendering, the callback will be called with optional user data.
+     * time a frame completes GPU rendering, the callback will be called.
      *
-     * The FrameCompletedCallback is guaranteed to be called on the main Filament thread.
+     * If handler is nullptr, the callback is guaranteed to be called on the main Filament thread.
      *
-     * @param callback    A callback, or nullptr to unset.
-     * @param user        An optional pointer to user data passed to the callback function.
+     * Use \c setFrameCompletedCallback() (with default arguments) to unset the callback.
+     *
+     * @param handler     Handler to dispatch the callback or nullptr for the default handler.
+     * @param callback    Callback called when each frame completes.
      *
      * @remark Only Filament's Metal backend supports frame callbacks. Other backends ignore the
      * callback (which will never be called) and proceed normally.
+     *
+     * @see CallbackHandler
      */
-    void setFrameCompletedCallback(FrameCompletedCallback callback, void* user = nullptr);
+    void setFrameCompletedCallback(backend::CallbackHandler* handler = nullptr,
+            FrameCompletedCallback&& callback = {}) noexcept;
+
 };
 
 } // namespace filament
diff --git a/filament/include/filament/View.h b/filament/include/filament/View.h
index b19ea3bfe07..7c0f1683e42 100644
--- a/filament/include/filament/View.h
+++ b/filament/include/filament/View.h
@@ -689,9 +689,12 @@ class UTILS_PUBLIC View : public FilamentAPI {
      * - shadowing
      * - punctual lights
      *
+     * Stereo rendering depends on device and platform support. To check if stereo rendering is
+     * supported, use Engine::isStereoSupported().
+     *
      * @param options The stereoscopic options to use on this view
      */
-    void setStereoscopicOptions(StereoscopicOptions const& options) noexcept;
+    void setStereoscopicOptions(StereoscopicOptions const& options);
 
     /**
      * Returns the stereoscopic options associated with this View.
diff --git a/filament/src/Engine.cpp b/filament/src/Engine.cpp
index ecedc590efb..01b956484c4 100644
--- a/filament/src/Engine.cpp
+++ b/filament/src/Engine.cpp
@@ -327,6 +327,10 @@ size_t Engine::getMaxAutomaticInstances() const noexcept {
     return downcast(this)->getMaxAutomaticInstances();
 }
 
+bool Engine::isStereoSupported() const noexcept {
+    return downcast(this)->isStereoSupported();
+}
+
 #if defined(__EMSCRIPTEN__)
 void Engine::resetBackendState() noexcept {
     downcast(this)->resetBackendState();
diff --git a/filament/src/Froxelizer.cpp b/filament/src/Froxelizer.cpp
index 995fc76af24..52e0d71a845 100644
--- a/filament/src/Froxelizer.cpp
+++ b/filament/src/Froxelizer.cpp
@@ -81,6 +81,21 @@ struct Froxelizer::FroxelThreadData :
         public std::array<LightGroupType, FROXEL_BUFFER_MAX_ENTRY_COUNT> {
 };
 
+
+// Returns false if the two matrices are different. May return false if they're the
+// same, with some elements only differing by +0 or -0. Behaviour is undefined with NaNs.
+static bool fuzzyEqual(mat4f const& UTILS_RESTRICT l, mat4f const& UTILS_RESTRICT r) noexcept {
+    auto const li = reinterpret_cast<uint32_t const*>( reinterpret_cast<char const*>(&l) );
+    auto const ri = reinterpret_cast<uint32_t const*>( reinterpret_cast<char const*>(&r) );
+    uint32_t result = 0;
+    for (size_t i = 0; i < sizeof(mat4f) / sizeof(uint32_t); i++) {
+        // clang fully vectorizes this
+        result |= li[i] ^ ri[i];
+    }
+    return result == 0;
+}
+
+
 Froxelizer::Froxelizer(FEngine& engine)
         : mArena("froxel", PER_FROXELDATA_ARENA_SIZE),
           mZLightNear(FROXEL_FIRST_SLICE_DEPTH),
@@ -144,9 +159,8 @@ void Froxelizer::setViewport(filament::Viewport const& viewport) noexcept {
 }
 
 void Froxelizer::setProjection(const mat4f& projection,
-        float near,
-        UTILS_UNUSED float far) noexcept {
-    if (UTILS_UNLIKELY(mat4f::fuzzyEqual(mProjection, projection))) {
+        float near, UTILS_UNUSED float far) noexcept {
+    if (UTILS_UNLIKELY(!fuzzyEqual(mProjection, projection))) {
         mProjection = projection;
         mNear = near;
         mDirtyFlags |= PROJECTION_CHANGED;
diff --git a/filament/src/SwapChain.cpp b/filament/src/SwapChain.cpp
index ae1498cc916..c30bce69416 100644
--- a/filament/src/SwapChain.cpp
+++ b/filament/src/SwapChain.cpp
@@ -28,8 +28,9 @@ void SwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void*
     return downcast(this)->setFrameScheduledCallback(callback, user);
 }
 
-void SwapChain::setFrameCompletedCallback(FrameCompletedCallback callback, void* user) {
-    return downcast(this)->setFrameCompletedCallback(callback, user);
+void SwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler,
+            utils::Invocable<void(SwapChain*)>&& callback) noexcept {
+    return downcast(this)->setFrameCompletedCallback(handler, std::move(callback));
 }
 
 bool SwapChain::isSRGBSwapChainSupported(Engine& engine) noexcept {
diff --git a/filament/src/View.cpp b/filament/src/View.cpp
index bc5da818290..dd8e9380a75 100644
--- a/filament/src/View.cpp
+++ b/filament/src/View.cpp
@@ -283,7 +283,7 @@ bool View::isStencilBufferEnabled() const noexcept {
     return downcast(this)->isStencilBufferEnabled();
 }
 
-void View::setStereoscopicOptions(const StereoscopicOptions& options) noexcept {
+void View::setStereoscopicOptions(const StereoscopicOptions& options) {
     return downcast(this)->setStereoscopicOptions(options);
 }
 
diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h
index 134cbe5067c..d9a7caeb917 100644
--- a/filament/src/details/Engine.h
+++ b/filament/src/details/Engine.h
@@ -182,6 +182,8 @@ class FEngine : public Engine {
         return CONFIG_MAX_INSTANCES;
     }
 
+    bool isStereoSupported() const noexcept { return getDriver().isStereoSupported(); }
+
     PostProcessManager const& getPostProcessManager() const noexcept {
         return mPostProcessManager;
     }
diff --git a/filament/src/details/Material.cpp b/filament/src/details/Material.cpp
index 9b3f8341572..f9cf7405cdd 100644
--- a/filament/src/details/Material.cpp
+++ b/filament/src/details/Material.cpp
@@ -478,15 +478,22 @@ void FMaterial::compile(CompilerPriorityQueue priority,
         backend::CallbackHandler* handler,
         utils::Invocable<void(Material*)>&& callback) noexcept {
 
+    // Turn off the STE variant if stereo is not supported.
+    if (!mEngine.getDriverApi().isStereoSupported()) {
+        variantSpec &= ~UserVariantFilterMask(UserVariantFilterBit::STE);
+    }
+
     UserVariantFilterMask const variantFilter =
             ~variantSpec & UserVariantFilterMask(UserVariantFilterBit::ALL);
 
-    auto const& variants = isVariantLit() ?
-            VariantUtils::getLitVariants() : VariantUtils::getUnlitVariants();
-    for (auto const variant : variants) {
-        if (!variantFilter || variant == Variant::filterUserVariant(variant, variantFilter)) {
-            if (hasVariant(variant)) {
-                prepareProgram(variant, priority);
+    if (UTILS_LIKELY(mEngine.getDriverApi().isParallelShaderCompileSupported())) {
+        auto const& variants = isVariantLit() ?
+                VariantUtils::getLitVariants() : VariantUtils::getUnlitVariants();
+        for (auto const variant: variants) {
+            if (!variantFilter || variant == Variant::filterUserVariant(variant, variantFilter)) {
+                if (hasVariant(variant)) {
+                    prepareProgram(variant, priority);
+                }
             }
         }
     }
diff --git a/filament/src/details/Material.h b/filament/src/details/Material.h
index a5372249063..e4b6f975c93 100644
--- a/filament/src/details/Material.h
+++ b/filament/src/details/Material.h
@@ -101,7 +101,7 @@ class FMaterial : public Material {
     // Must be called after prepareProgram().
     [[nodiscard]] backend::Handle<backend::HwProgram> getProgram(Variant variant) const noexcept {
 #if FILAMENT_ENABLE_MATDBG
-        assert_invariant(variant.key < VARIANT_COUNT);
+        assert_invariant((size_t)variant.key < VARIANT_COUNT);
         std::unique_lock<utils::Mutex> lock(mActiveProgramsLock);
         mActivePrograms.set(variant.key);
         lock.unlock();
diff --git a/filament/src/details/SwapChain.cpp b/filament/src/details/SwapChain.cpp
index ba13be2e2d3..d9cb80911d9 100644
--- a/filament/src/details/SwapChain.cpp
+++ b/filament/src/details/SwapChain.cpp
@@ -38,8 +38,24 @@ void FSwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void
     mEngine.getDriverApi().setFrameScheduledCallback(mSwapChain, callback, user);
 }
 
-void FSwapChain::setFrameCompletedCallback(FrameCompletedCallback callback, void* user) {
-    mEngine.getDriverApi().setFrameCompletedCallback(mSwapChain, callback, user);
+void FSwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler,
+                utils::Invocable<void(SwapChain*)>&& callback) noexcept {
+    struct Callback {
+        utils::Invocable<void(SwapChain*)> f;
+        SwapChain* s;
+        static void func(void* user) {
+            auto* const c = reinterpret_cast<Callback*>(user);
+            c->f(c->s);
+            delete c;
+        }
+    };
+    if (callback) {
+        auto* const user = new(std::nothrow) Callback{ std::move(callback), this };
+        mEngine.getDriverApi().setFrameCompletedCallback(
+                mSwapChain, handler, &Callback::func, static_cast<void*>(user));
+    } else {
+        mEngine.getDriverApi().setFrameCompletedCallback(mSwapChain, nullptr, nullptr, nullptr);
+    }
 }
 
 bool FSwapChain::isSRGBSwapChainSupported(FEngine& engine) noexcept {
diff --git a/filament/src/details/SwapChain.h b/filament/src/details/SwapChain.h
index c1a3f436d2c..032b5e3f914 100644
--- a/filament/src/details/SwapChain.h
+++ b/filament/src/details/SwapChain.h
@@ -23,6 +23,9 @@
 
 #include <filament/SwapChain.h>
 
+#include <backend/CallbackHandler.h>
+
+#include <utils/Invocable.h>
 #include <utils/compiler.h>
 
 namespace filament {
@@ -61,7 +64,8 @@ class FSwapChain : public SwapChain {
 
     void setFrameScheduledCallback(FrameScheduledCallback callback, void* user);
 
-    void setFrameCompletedCallback(FrameCompletedCallback callback, void* user);
+    void setFrameCompletedCallback(backend::CallbackHandler* handler,
+                utils::Invocable<void(SwapChain*)>&& callback) noexcept;
 
     static bool isSRGBSwapChainSupported(FEngine& engine) noexcept;
 
diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp
index bb85c059bf8..5b932a42b7f 100644
--- a/filament/src/details/View.cpp
+++ b/filament/src/details/View.cpp
@@ -58,6 +58,7 @@ static constexpr float PID_CONTROLLER_Kd = 0.0f;
 FView::FView(FEngine& engine)
         : mFroxelizer(engine),
           mFogEntity(engine.getEntityManager().create()),
+          mIsStereoSupported(engine.getDriverApi().isStereoSupported()),
           mPerViewUniforms(engine),
           mShadowMapManager(engine) {
     DriverApi& driver = engine.getDriverApi();
@@ -1117,7 +1118,9 @@ View::PickingQuery& FView::pick(uint32_t x, uint32_t y, backend::CallbackHandler
     return *pQuery;
 }
 
-void FView::setStereoscopicOptions(const StereoscopicOptions& options) noexcept {
+void FView::setStereoscopicOptions(const StereoscopicOptions& options) {
+    ASSERT_PRECONDITION(!options.enabled || mIsStereoSupported,
+            "Stereo rendering is not supported.");
     mStereoscopicOptions = options;
 }
 
diff --git a/filament/src/details/View.h b/filament/src/details/View.h
index 60122aec4fd..f452ce22f18 100644
--- a/filament/src/details/View.h
+++ b/filament/src/details/View.h
@@ -193,7 +193,7 @@ class FView : public View {
 
     bool isStencilBufferEnabled() const noexcept { return mStencilBufferEnabled; }
 
-    void setStereoscopicOptions(StereoscopicOptions const& options) noexcept;
+    void setStereoscopicOptions(StereoscopicOptions const& options);
 
     FCamera const* getDirectionalLightCamera() const noexcept {
         return &mShadowMapManager.getShadowMap(0)->getDebugCamera();
@@ -524,6 +524,7 @@ class FView : public View {
     const FColorGrading* mColorGrading = nullptr;
     const FColorGrading* mDefaultColorGrading = nullptr;
     utils::Entity mFogEntity{};
+    bool mIsStereoSupported : 1;
 
     PIDController mPidController;
     DynamicResolutionOptions mDynamicResolution;
diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec
index 33db11f276e..8c09b6521b7 100644
--- a/ios/CocoaPods/Filament.podspec
+++ b/ios/CocoaPods/Filament.podspec
@@ -1,12 +1,12 @@
 Pod::Spec.new do |spec|
   spec.name = "Filament"
-  spec.version = "1.42.0"
+  spec.version = "1.42.1"
   spec.license = { :type => "Apache 2.0", :file => "LICENSE" }
   spec.homepage = "https://google.github.io/filament"
   spec.authors = "Google LLC."
   spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL."
   spec.platform = :ios, "11.0"
-  spec.source = { :http => "https://github.com/google/filament/releases/download/v1.42.0/filament-v1.42.0-ios.tgz" }
+  spec.source = { :http => "https://github.com/google/filament/releases/download/v1.42.1/filament-v1.42.1-ios.tgz" }
 
   # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon.
   spec.pod_target_xcconfig = {
diff --git a/libs/filamat/src/shaders/CodeGenerator.cpp b/libs/filamat/src/shaders/CodeGenerator.cpp
index 90a20f321a3..b854ff3fe32 100644
--- a/libs/filamat/src/shaders/CodeGenerator.cpp
+++ b/libs/filamat/src/shaders/CodeGenerator.cpp
@@ -239,7 +239,10 @@ utils::io::sstream& CodeGenerator::generateProlog(utils::io::sstream& out, Shade
     generateSpecializationConstant(out, "CONFIG_POWER_VR_SHADER_WORKAROUNDS",
             +ReservedSpecializationConstants::CONFIG_POWER_VR_SHADER_WORKAROUNDS, false);
 
-    out << "const int CONFIG_STEREOSCOPIC_EYES = " << (int)CONFIG_STEREOSCOPIC_EYES << ";\n";
+    // CONFIG_STEREOSCOPIC_EYES is used to size arrays and on Adreno GPUs + vulkan, this has to
+    // be explicitly, statically defined (as in #define). Otherwise (using const int for
+    // example), we'd run into a GPU crash.
+    out << "#define CONFIG_STEREOSCOPIC_EYES " << (int) CONFIG_STEREOSCOPIC_EYES << "\n";
 
     if (material.featureLevel == 0) {
         // On ES2 since we don't have post-processing, we need to emulate EGL_GL_COLORSPACE_KHR,
diff --git a/libs/math/include/math/mat2.h b/libs/math/include/math/mat2.h
index 551fe4451ed..dba9ca47230 100644
--- a/libs/math/include/math/mat2.h
+++ b/libs/math/include/math/mat2.h
@@ -235,23 +235,6 @@ class MATH_EMPTY_BASES TMat22 :
         return r;
     }
 
-    // returns false if the two matrices are different. May return false if they're the
-    // same, with some elements only differing by +0 or -0. Behaviour is undefined with NaNs.
-    static constexpr bool fuzzyEqual(TMat22 l, TMat22 r) noexcept {
-        uint64_t const* const li = reinterpret_cast<uint64_t const*>(&l);
-        uint64_t const* const ri = reinterpret_cast<uint64_t const*>(&r);
-        uint64_t result = 0;
-        // For some reason clang is not able to vectoize this loop when the number of iteration
-        // is known and constant (!?!?!). Still this is better than operator==.
-#if defined(__clang__)
-#pragma clang loop vectorize_width(2)
-#endif
-        for (size_t i = 0; i < sizeof(TMat22) / sizeof(uint64_t); i++) {
-            result |= li[i] ^ ri[i];
-        }
-        return result != 0;
-    }
-
     template<typename A>
     static constexpr TMat22 translation(const TVec2<A>& t) noexcept {
         TMat22 r;
diff --git a/libs/math/include/math/mat4.h b/libs/math/include/math/mat4.h
index fa5301adfaa..d44081b2648 100644
--- a/libs/math/include/math/mat4.h
+++ b/libs/math/include/math/mat4.h
@@ -272,24 +272,6 @@ class MATH_EMPTY_BASES TMat44 :
     template<typename U, typename V>
     constexpr TMat44(const TMat33<U>& matrix, const TVec4<V>& column3) noexcept;
 
-    /*
-     *  helpers
-     */
-
-    // returns false if the two matrices are different. May return false if they're the
-    // same, with some elements only differing by +0 or -0. Behaviour is undefined with NaNs.
-    static constexpr bool fuzzyEqual(TMat44 const& l, TMat44 const& r) noexcept {
-        uint64_t const* const li = reinterpret_cast<uint64_t const*>(&l);
-        uint64_t const* const ri = reinterpret_cast<uint64_t const*>(&r);
-        uint64_t result = 0;
-        // For some reason clang is not able to vectorize this loop when the number of iteration
-        // is known and constant (!?!?!). Still this is better than operator==.
-        for (size_t i = 0; i < sizeof(TMat44) / sizeof(uint64_t); i++) {
-            result |= li[i] ^ ri[i];
-        }
-        return result != 0;
-    }
-
     static constexpr TMat44 ortho(T left, T right, T bottom, T top, T near, T far) noexcept;
 
     static constexpr TMat44 frustum(T left, T right, T bottom, T top, T near, T far) noexcept;
diff --git a/libs/math/tests/test_mat.cpp b/libs/math/tests/test_mat.cpp
index 6afe9c81fc3..d53e73a9168 100644
--- a/libs/math/tests/test_mat.cpp
+++ b/libs/math/tests/test_mat.cpp
@@ -573,6 +573,18 @@ do {                                                            \
     }                                                           \
 } while(0)
 
+//------------------------------------------------------------------------------
+// A macro to help with vector comparisons within a range.
+#define EXPECT_VEC_NEAR(VEC1, VEC2, eps)                        \
+do {                                                            \
+    const decltype(VEC1) v1 = VEC1;                             \
+    const decltype(VEC2) v2 = VEC2;                             \
+    for (int i = 0; i < v1.size(); ++i) {                       \
+        EXPECT_NEAR(v1[i], v2[i], eps);                         \
+    }                                                           \
+} while(0)
+
+
 //------------------------------------------------------------------------------
 // A macro to help with type comparisons within floating point range.
 #define ASSERT_TYPE_EQ(T1, T2)                                  \
@@ -834,9 +846,10 @@ TYPED_TEST(MatTestT, cofactor) {
         M33T r = M33T::eulerZYX(rand_gen(), rand_gen(), rand_gen());
         M33T c0 = details::matrix::cofactor(r);
         M33T c1 = details::matrix::fastCofactor3(r);
-        EXPECT_VEC_EQ(c0[0], c1[0]);
-        EXPECT_VEC_EQ(c0[1], c1[1]);
-        EXPECT_VEC_EQ(c0[2], c1[2]);
+
+        EXPECT_VEC_NEAR(c0[0], c1[0], value_eps);
+        EXPECT_VEC_NEAR(c0[1], c1[1], value_eps);
+        EXPECT_VEC_NEAR(c0[2], c1[2], value_eps);
     }
 }
 
diff --git a/web/filament-js/package.json b/web/filament-js/package.json
index 9dfa743cf1c..3bb5eaa82c1 100644
--- a/web/filament-js/package.json
+++ b/web/filament-js/package.json
@@ -1,6 +1,6 @@
 {
   "name": "filament",
-  "version": "1.42.0",
+  "version": "1.42.1",
   "description": "Real-time physically based rendering engine",
   "main": "filament.js",
   "module": "filament.js",