From 701b50d4a299e24daf5d945cde28e8df079c6937 Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Fri, 14 Jun 2024 22:29:56 -0400
Subject: [PATCH] Refactor CI matrix. (#1844)

* Remove the os-map from the matrix.

We no longer need this to lookup images.

* Migrate `cxx`.

* Migrate devcontainer ver and all stds list.

* Migrate ctk version info.

* Migrate device compiler.

* Refactor cxx handling to reuse string parser.

* Migrate GPU tables.

* Migrate project tables.

* Migrate job tables.

* Migrate tag tables.

* Fix exclusion handling.

* Update devcontainers to remove OS info.

* Bump devcontainer version to access os-less images.

* It's that time again.

New version of gcc struggles with replace.cu.

* Fix typo.

* Resolve merge issue in matrix.yaml

* Fix MSVC version for cudax

* Remove duplicate build job from cudax matrix.

* Fix bad syntax in asynchronous_operations.rst.

* Update devcontainers for cudax.
---
 .devcontainer/cuda11.1-gcc6/devcontainer.json |   2 +-
 .devcontainer/cuda11.1-gcc7/devcontainer.json |   2 +-
 .devcontainer/cuda11.1-gcc8/devcontainer.json |   2 +-
 .devcontainer/cuda11.1-gcc9/devcontainer.json |   2 +-
 .../cuda11.1-llvm9/devcontainer.json          |   2 +-
 .../cuda11.8-gcc11/devcontainer.json          |   2 +-
 .../cuda12.0-gcc10/devcontainer.json          |   2 +-
 .../cuda12.0-gcc11/devcontainer.json          |   2 +-
 .../cuda12.0-gcc12/devcontainer.json          |   2 +-
 .devcontainer/cuda12.0-gcc9/devcontainer.json |   2 +-
 .../cuda12.0-llvm10/devcontainer.json         |   2 +-
 .../cuda12.0-llvm11/devcontainer.json         |   2 +-
 .../cuda12.0-llvm12/devcontainer.json         |   2 +-
 .../cuda12.0-llvm13/devcontainer.json         |   2 +-
 .../cuda12.0-llvm14/devcontainer.json         |   2 +-
 .../cuda12.0-llvm9/devcontainer.json          |   2 +-
 .../cuda12.4-gcc10/devcontainer.json          |   2 +-
 .../cuda12.4-gcc11/devcontainer.json          |   2 +-
 .../cuda12.4-gcc12/devcontainer.json          |   2 +-
 .../cuda12.4-gcc13/devcontainer.json          |   2 +-
 .devcontainer/cuda12.4-gcc7/devcontainer.json |   2 +-
 .devcontainer/cuda12.4-gcc8/devcontainer.json |   2 +-
 .devcontainer/cuda12.4-gcc9/devcontainer.json |   2 +-
 .../cuda12.4-llvm10/devcontainer.json         |   2 +-
 .../cuda12.4-llvm11/devcontainer.json         |   2 +-
 .../cuda12.4-llvm12/devcontainer.json         |   2 +-
 .../cuda12.4-llvm13/devcontainer.json         |   2 +-
 .../cuda12.4-llvm14/devcontainer.json         |   2 +-
 .../cuda12.4-llvm15/devcontainer.json         |   2 +-
 .../cuda12.4-llvm16/devcontainer.json         |   2 +-
 .../cuda12.4-llvm17/devcontainer.json         |   2 +-
 .../cuda12.4-llvm18/devcontainer.json         |  51 --
 .../cuda12.4-llvm9/devcontainer.json          |   2 +-
 .../cuda12.4-oneapi2023.2.0/devcontainer.json |   2 +-
 .devcontainer/devcontainer.json               |   2 +-
 .devcontainer/make_devcontainers.sh           |  13 +-
 .../actions/workflow-build/build-workflow.py  | 577 +++++++++++------
 ci/matrix.yaml                                | 584 ++++++++----------
 .../extended_api/asynchronous_operations.rst  |   2 +-
 thrust/testing/replace.cu                     |  10 +-
 40 files changed, 666 insertions(+), 639 deletions(-)
 delete mode 100644 .devcontainer/cuda12.4-llvm18/devcontainer.json

diff --git a/.devcontainer/cuda11.1-gcc6/devcontainer.json b/.devcontainer/cuda11.1-gcc6/devcontainer.json
index da0e45b468..2114e5fd8f 100644
--- a/.devcontainer/cuda11.1-gcc6/devcontainer.json
+++ b/.devcontainer/cuda11.1-gcc6/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc6-cuda11.1-ubuntu18.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc6-cuda11.1",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda11.1-gcc7/devcontainer.json b/.devcontainer/cuda11.1-gcc7/devcontainer.json
index 004f540ce4..7a9a07355f 100644
--- a/.devcontainer/cuda11.1-gcc7/devcontainer.json
+++ b/.devcontainer/cuda11.1-gcc7/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc7-cuda11.1-ubuntu18.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc7-cuda11.1",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda11.1-gcc8/devcontainer.json b/.devcontainer/cuda11.1-gcc8/devcontainer.json
index af0b552843..50c1bdca6b 100644
--- a/.devcontainer/cuda11.1-gcc8/devcontainer.json
+++ b/.devcontainer/cuda11.1-gcc8/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc8-cuda11.1-ubuntu18.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc8-cuda11.1",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda11.1-gcc9/devcontainer.json b/.devcontainer/cuda11.1-gcc9/devcontainer.json
index 5e8d314cfc..f069ed0a11 100644
--- a/.devcontainer/cuda11.1-gcc9/devcontainer.json
+++ b/.devcontainer/cuda11.1-gcc9/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc9-cuda11.1-ubuntu18.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc9-cuda11.1",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda11.1-llvm9/devcontainer.json b/.devcontainer/cuda11.1-llvm9/devcontainer.json
index d2121d3cdb..0b95a93677 100644
--- a/.devcontainer/cuda11.1-llvm9/devcontainer.json
+++ b/.devcontainer/cuda11.1-llvm9/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm9-cuda11.1-ubuntu18.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm9-cuda11.1",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda11.8-gcc11/devcontainer.json b/.devcontainer/cuda11.8-gcc11/devcontainer.json
index 65334bb6b2..20b430c4c1 100644
--- a/.devcontainer/cuda11.8-gcc11/devcontainer.json
+++ b/.devcontainer/cuda11.8-gcc11/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda11.8-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc11-cuda11.8",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-gcc10/devcontainer.json b/.devcontainer/cuda12.0-gcc10/devcontainer.json
index 7d809860d4..659f5a0320 100644
--- a/.devcontainer/cuda12.0-gcc10/devcontainer.json
+++ b/.devcontainer/cuda12.0-gcc10/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc10-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc10-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-gcc11/devcontainer.json b/.devcontainer/cuda12.0-gcc11/devcontainer.json
index cc9b417f32..62a89b837d 100644
--- a/.devcontainer/cuda12.0-gcc11/devcontainer.json
+++ b/.devcontainer/cuda12.0-gcc11/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda12.0-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc11-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-gcc12/devcontainer.json b/.devcontainer/cuda12.0-gcc12/devcontainer.json
index 2df8bc6c17..1eb084299d 100644
--- a/.devcontainer/cuda12.0-gcc12/devcontainer.json
+++ b/.devcontainer/cuda12.0-gcc12/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.0-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc12-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-gcc9/devcontainer.json b/.devcontainer/cuda12.0-gcc9/devcontainer.json
index b9c0e03c88..daa1ba6a92 100644
--- a/.devcontainer/cuda12.0-gcc9/devcontainer.json
+++ b/.devcontainer/cuda12.0-gcc9/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc9-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc9-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-llvm10/devcontainer.json b/.devcontainer/cuda12.0-llvm10/devcontainer.json
index a64e56c1a4..8bb371e013 100644
--- a/.devcontainer/cuda12.0-llvm10/devcontainer.json
+++ b/.devcontainer/cuda12.0-llvm10/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm10-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm10-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-llvm11/devcontainer.json b/.devcontainer/cuda12.0-llvm11/devcontainer.json
index 26bb43d1f9..ff1f07c59b 100644
--- a/.devcontainer/cuda12.0-llvm11/devcontainer.json
+++ b/.devcontainer/cuda12.0-llvm11/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm11-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm11-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-llvm12/devcontainer.json b/.devcontainer/cuda12.0-llvm12/devcontainer.json
index 16f8856796..3053ac9c8b 100644
--- a/.devcontainer/cuda12.0-llvm12/devcontainer.json
+++ b/.devcontainer/cuda12.0-llvm12/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm12-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm12-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-llvm13/devcontainer.json b/.devcontainer/cuda12.0-llvm13/devcontainer.json
index c839e60c97..0e73694058 100644
--- a/.devcontainer/cuda12.0-llvm13/devcontainer.json
+++ b/.devcontainer/cuda12.0-llvm13/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm13-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm13-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-llvm14/devcontainer.json b/.devcontainer/cuda12.0-llvm14/devcontainer.json
index 45eba1c0b0..63a6eff170 100644
--- a/.devcontainer/cuda12.0-llvm14/devcontainer.json
+++ b/.devcontainer/cuda12.0-llvm14/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm14-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm14-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.0-llvm9/devcontainer.json b/.devcontainer/cuda12.0-llvm9/devcontainer.json
index c4a000198b..f4eb0a86f5 100644
--- a/.devcontainer/cuda12.0-llvm9/devcontainer.json
+++ b/.devcontainer/cuda12.0-llvm9/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm9-cuda12.0-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm9-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-gcc10/devcontainer.json b/.devcontainer/cuda12.4-gcc10/devcontainer.json
index 9d2572708d..bff42729b8 100644
--- a/.devcontainer/cuda12.4-gcc10/devcontainer.json
+++ b/.devcontainer/cuda12.4-gcc10/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc10-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc10-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-gcc11/devcontainer.json b/.devcontainer/cuda12.4-gcc11/devcontainer.json
index 2facb1b267..0287909693 100644
--- a/.devcontainer/cuda12.4-gcc11/devcontainer.json
+++ b/.devcontainer/cuda12.4-gcc11/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc11-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-gcc12/devcontainer.json b/.devcontainer/cuda12.4-gcc12/devcontainer.json
index e5646487c3..c59d0cbb00 100644
--- a/.devcontainer/cuda12.4-gcc12/devcontainer.json
+++ b/.devcontainer/cuda12.4-gcc12/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc12-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-gcc13/devcontainer.json b/.devcontainer/cuda12.4-gcc13/devcontainer.json
index 74801ad712..0c272c4e00 100644
--- a/.devcontainer/cuda12.4-gcc13/devcontainer.json
+++ b/.devcontainer/cuda12.4-gcc13/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc13-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc13-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-gcc7/devcontainer.json b/.devcontainer/cuda12.4-gcc7/devcontainer.json
index 1b96e30958..7e7f2b4a3b 100644
--- a/.devcontainer/cuda12.4-gcc7/devcontainer.json
+++ b/.devcontainer/cuda12.4-gcc7/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc7-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc7-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-gcc8/devcontainer.json b/.devcontainer/cuda12.4-gcc8/devcontainer.json
index 29341f5f00..b7dcc5697f 100644
--- a/.devcontainer/cuda12.4-gcc8/devcontainer.json
+++ b/.devcontainer/cuda12.4-gcc8/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc8-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc8-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-gcc9/devcontainer.json b/.devcontainer/cuda12.4-gcc9/devcontainer.json
index 11d7d3c596..57da5c3831 100644
--- a/.devcontainer/cuda12.4-gcc9/devcontainer.json
+++ b/.devcontainer/cuda12.4-gcc9/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc9-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc9-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm10/devcontainer.json b/.devcontainer/cuda12.4-llvm10/devcontainer.json
index 87f350840b..e2ea5c05a8 100644
--- a/.devcontainer/cuda12.4-llvm10/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm10/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm10-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm10-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm11/devcontainer.json b/.devcontainer/cuda12.4-llvm11/devcontainer.json
index 12ffbf5dd0..e891c92821 100644
--- a/.devcontainer/cuda12.4-llvm11/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm11/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm11-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm11-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm12/devcontainer.json b/.devcontainer/cuda12.4-llvm12/devcontainer.json
index 6b8417ecda..a5470dc80e 100644
--- a/.devcontainer/cuda12.4-llvm12/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm12/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm12-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm12-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm13/devcontainer.json b/.devcontainer/cuda12.4-llvm13/devcontainer.json
index c4c92bc5bf..92dc210e70 100644
--- a/.devcontainer/cuda12.4-llvm13/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm13/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm13-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm13-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm14/devcontainer.json b/.devcontainer/cuda12.4-llvm14/devcontainer.json
index 10eb1a5569..9e6c87b19d 100644
--- a/.devcontainer/cuda12.4-llvm14/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm14/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm14-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm14-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm15/devcontainer.json b/.devcontainer/cuda12.4-llvm15/devcontainer.json
index 8a64eb314f..6ef6623a8c 100644
--- a/.devcontainer/cuda12.4-llvm15/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm15/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm15-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm15-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm16/devcontainer.json b/.devcontainer/cuda12.4-llvm16/devcontainer.json
index 8a61b9d648..4cf67e59e2 100644
--- a/.devcontainer/cuda12.4-llvm16/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm16/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm16-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm16-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm17/devcontainer.json b/.devcontainer/cuda12.4-llvm17/devcontainer.json
index 39ef9d3888..e4f3cfa04a 100644
--- a/.devcontainer/cuda12.4-llvm17/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm17/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm17-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm17-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-llvm18/devcontainer.json b/.devcontainer/cuda12.4-llvm18/devcontainer.json
deleted file mode 100644
index 55f8c409b8..0000000000
--- a/.devcontainer/cuda12.4-llvm18/devcontainer.json
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-  "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm18-cuda12.4-ubuntu22.04",
-  "hostRequirements": {
-    "gpu": "optional"
-  },
-  "initializeCommand": [
-    "/bin/bash",
-    "-c",
-    "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
-  ],
-  "containerEnv": {
-    "SCCACHE_REGION": "us-east-2",
-    "SCCACHE_BUCKET": "rapids-sccache-devs",
-    "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
-    "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda12.4-llvm18",
-    "CCCL_CUDA_VERSION": "12.4",
-    "CCCL_HOST_COMPILER": "llvm",
-    "CCCL_HOST_COMPILER_VERSION": "18",
-    "CCCL_BUILD_INFIX": "cuda12.4-llvm18"
-  },
-  "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-  "mounts": [
-    "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
-    "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
-  ],
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "llvm-vs-code-extensions.vscode-clangd",
-        "xaver.clang-format",
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cmake-tools"
-      ],
-      "settings": {
-        "editor.defaultFormatter": "xaver.clang-format",
-        "editor.formatOnSave": true,
-        "clang-format.executable": "/usr/local/bin/clang-format",
-        "clangd.arguments": [
-          "--compile-commands-dir=${workspaceFolder}"
-        ],
-        "files.eol": "\n",
-        "files.trimTrailingWhitespace": true
-      }
-    }
-  },
-  "name": "cuda12.4-llvm18"
-}
diff --git a/.devcontainer/cuda12.4-llvm9/devcontainer.json b/.devcontainer/cuda12.4-llvm9/devcontainer.json
index 0f9f9f0502..96a6d1d002 100644
--- a/.devcontainer/cuda12.4-llvm9/devcontainer.json
+++ b/.devcontainer/cuda12.4-llvm9/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-llvm9-cuda12.4-ubuntu20.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-llvm9-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/cuda12.4-oneapi2023.2.0/devcontainer.json b/.devcontainer/cuda12.4-oneapi2023.2.0/devcontainer.json
index 9fb6ffe7dd..26f70e496f 100644
--- a/.devcontainer/cuda12.4-oneapi2023.2.0/devcontainer.json
+++ b/.devcontainer/cuda12.4-oneapi2023.2.0/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-oneapi2023.2.0-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-oneapi2023.2.0-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 74801ad712..0c272c4e00 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:24.06-cpp-gcc13-cuda12.4-ubuntu22.04",
+  "image": "rapidsai/devcontainers:24.08-cpp-gcc13-cuda12.4",
   "hostRequirements": {
     "gpu": "optional"
   },
diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh
index 18f9c51eca..05af7e2ac6 100755
--- a/.devcontainer/make_devcontainers.sh
+++ b/.devcontainer/make_devcontainers.sh
@@ -28,15 +28,14 @@ update_devcontainer() {
     local compiler_name="$5"
     local compiler_exe="$6"
     local compiler_version="$7"
-    local os="$8"
-    local devcontainer_version="$9"
+    local devcontainer_version="$8"
 
     local IMAGE_ROOT="rapidsai/devcontainers:${devcontainer_version}-cpp-"
-    local image="${IMAGE_ROOT}${compiler_name}${compiler_version}-cuda${cuda_version}-${os}"
+    local image="${IMAGE_ROOT}${compiler_name}${compiler_version}-cuda${cuda_version}"
 
     jq --arg image "$image" --arg name "$name" \
        --arg cuda_version "$cuda_version" --arg compiler_name "$compiler_name" \
-       --arg compiler_exe "$compiler_exe" --arg compiler_version "$compiler_version" --arg os "$os" \
+       --arg compiler_exe "$compiler_exe" --arg compiler_version "$compiler_version" \
        '.image = $image | .name = $name | .containerEnv.DEVCONTAINER_NAME = $name |
         .containerEnv.CCCL_BUILD_INFIX = $name |
         .containerEnv.CCCL_CUDA_VERSION = $cuda_version | .containerEnv.CCCL_HOST_COMPILER = $compiler_name |
@@ -103,10 +102,9 @@ readonly DEFAULT_CUDA=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.cuda')
 readonly DEFAULT_COMPILER_NAME=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_name')
 readonly DEFAULT_COMPILER_EXE=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_exe')
 readonly DEFAULT_COMPILER_VERSION=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_version')
-readonly DEFAULT_OS=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.os')
 readonly DEFAULT_NAME=$(make_name "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_VERSION")
 
-update_devcontainer ${base_devcontainer_file} "./temp_devcontainer.json" "$DEFAULT_NAME" "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_EXE" "$DEFAULT_COMPILER_VERSION" "$DEFAULT_OS" "$DEVCONTAINER_VERSION"
+update_devcontainer ${base_devcontainer_file} "./temp_devcontainer.json" "$DEFAULT_NAME" "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_EXE" "$DEFAULT_COMPILER_VERSION" "$DEVCONTAINER_VERSION"
 mv "./temp_devcontainer.json" ${base_devcontainer_file}
 
 # Create an array to keep track of valid subdirectory names
@@ -121,13 +119,12 @@ for combination in $combinations; do
     compiler_name=$(echo "$combination" | jq -r '.compiler_name')
     compiler_exe=$(echo "$combination" | jq -r '.compiler_exe')
     compiler_version=$(echo "$combination" | jq -r '.compiler_version')
-    os=$(echo "$combination" | jq -r '.os')
 
     name=$(make_name "$cuda_version" "$compiler_name" "$compiler_version")
     mkdir -p "$name"
     new_devcontainer_file="$name/devcontainer.json"
 
-    update_devcontainer "$base_devcontainer_file" "$new_devcontainer_file" "$name" "$cuda_version" "$compiler_name" "$compiler_exe" "$compiler_version" "$os" "$DEVCONTAINER_VERSION"
+    update_devcontainer "$base_devcontainer_file" "$new_devcontainer_file" "$name" "$cuda_version" "$compiler_name" "$compiler_exe" "$compiler_version" "$DEVCONTAINER_VERSION"
     echo "Created $new_devcontainer_file"
 
     # Add the subdirectory name to the valid_subdirs array
diff --git a/.github/actions/workflow-build/build-workflow.py b/.github/actions/workflow-build/build-workflow.py
index 75b7292aaa..a3b216e3fd 100755
--- a/.github/actions/workflow-build/build-workflow.py
+++ b/.github/actions/workflow-build/build-workflow.py
@@ -8,23 +8,18 @@
     "jobs": [
       "test"
     ],
-    "ctk": "11.1",
-    "gpu": "t4",
-    "sm": "75-real",
-    "cxx": {
-      "name": "llvm",
-      "version": "9",
-      "exe": "clang++"
-    },
-    "std": [
-      17
-    ],
     "project": [
       "libcudacxx",
       "cub",
       "thrust"
     ],
-    "os": "ubuntu18.04"
+    "ctk": "11.1",
+    "cudacxx": 'nvcc',
+    "cxx": 'gcc10',
+    "sm": "75-real",
+    "std": 17
+    "cpu": "amd64",
+    "gpu": "t4",
   }
 
 Matrix jobs are read from the matrix.yaml file and converted into a JSON object and passed to matrix_job_to_dispatch_group, where
@@ -115,19 +110,208 @@ def error_message_with_matrix_job(matrix_job, message):
     return f"{matrix_job['origin']['workflow_location']}: {message}\n  Input: {matrix_job['origin']['original_matrix_job']}"
 
 
+@memoize_result
+def canonicalize_ctk_version(ctk_string):
+    if ctk_string in matrix_yaml['ctk_versions']:
+        return ctk_string
+
+    # Check for aka's:
+    for ctk_key, ctk_value in matrix_yaml['ctk_versions'].items():
+        if 'aka' in ctk_value and ctk_string == ctk_value['aka']:
+            return ctk_key
+
+    raise Exception(f"Unknown CTK version '{ctk_string}'")
+
+
+def get_ctk(ctk_string):
+    result = matrix_yaml['ctk_versions'][ctk_string]
+    result["version"] = ctk_string
+    return result
+
+
+@memoize_result
+def parse_cxx_string(cxx_string):
+    "Returns (id, version) tuple. Version may be None if not present."
+    return re.match(r'^([a-z]+)-?([\d\.]+)?$', cxx_string).groups()
+
+
+@memoize_result
+def canonicalize_host_compiler_name(cxx_string):
+    """
+    Canonicalize the host compiler cxx_string.
+
+    Valid input formats: 'gcc', 'gcc10', or 'gcc-12'.
+    Output format: 'gcc12'.
+
+    If no version is specified, the latest version is used.
+    """
+    id, version = parse_cxx_string(cxx_string)
+
+    if not id in matrix_yaml['host_compilers']:
+        raise Exception(
+            f"Unknown host compiler '{id}'. Valid options are: {', '.join(matrix_yaml['host_compilers'].keys())}")
+
+    hc_def = matrix_yaml['host_compilers'][id]
+    hc_versions = hc_def['versions']
+
+    if not version:
+        version = max(hc_def['versions'].keys(), key=lambda x: tuple(map(int, x.split('.'))))
+
+    # Check for aka's:
+    if not version in hc_def['versions']:
+        for version_key, version_data in hc_def['versions'].items():
+            if 'aka' in version_data and version == version_data['aka']:
+                version = version_key
+
+    if not version in hc_def['versions']:
+        raise Exception(
+            f"Unknown version '{version}' for host compiler '{id}'.")
+
+    cxx_string = f"{id}{version}"
+
+    return cxx_string
+
+
+@memoize_result
+def get_host_compiler(cxx_string):
+    "Expects a canonicalized cxx_string."
+    id, version = parse_cxx_string(cxx_string)
+
+    if not id in matrix_yaml['host_compilers']:
+        raise Exception(
+            f"Unknown host compiler '{id}'. Valid options are: {', '.join(matrix_yaml['host_compilers'].keys())}")
+
+    hc_def = matrix_yaml['host_compilers'][id]
+
+    if not version in hc_def['versions']:
+        raise Exception(
+            f"Unknown version '{version}' for host compiler '{id}'. Valid options are: {', '.join(hc_def['versions'].keys())}")
+
+    version_def = hc_def['versions'][version]
+
+    result = {'id': id,
+              'name': hc_def['name'],
+              'version': version,
+              'container_tag': hc_def['container_tag'],
+              'exe': hc_def['exe']}
+
+    for key, value in version_def.items():
+        result[key] = value
+
+    return result
+
+
+def get_device_compiler(matrix_job):
+    id = matrix_job['cudacxx']
+    if not id in matrix_yaml['device_compilers'].keys():
+        raise Exception(
+            f"Unknown device compiler '{id}'. Valid options are: {', '.join(matrix_yaml['device_compilers'].keys())}")
+    result = matrix_yaml['device_compilers'][id]
+    result['id'] = id
+
+    if id == 'nvcc':
+        ctk = get_ctk(matrix_job['ctk'])
+        result['version'] = ctk['version']
+        result['stds'] = ctk['stds']
+    elif id == 'clang':
+        host_compiler = get_host_compiler(matrix_job['cxx'])
+        result['version'] = host_compiler['version']
+        result['stds'] = host_compiler['stds']
+    else:
+        raise Exception(f"Cannot determine version/std info for device compiler '{id}'")
+
+    return result
+
+
+@memoize_result
+def get_gpu(gpu_string):
+    if not gpu_string in matrix_yaml['gpus']:
+        raise Exception(
+            f"Unknown gpu '{gpu_string}'. Valid options are: {', '.join(matrix_yaml['gpus'].keys())}")
+
+    result = matrix_yaml['gpus'][gpu_string]
+    result['id'] = gpu_string
+
+    if not 'testing' in result:
+        result['testing'] = False
+
+    return result
+
+
+@memoize_result
+def get_project(project):
+    if not project in matrix_yaml['projects'].keys():
+        raise Exception(
+            f"Unknown project '{project}'. Valid options are: {', '.join(matrix_yaml['projects'].keys())}")
+
+    result = matrix_yaml['projects'][project]
+    result['id'] = project
+
+    if not 'name' in result:
+        result['name'] = project
+
+    if not 'job_map' in result:
+        result['job_map'] = {}
+
+    return result
+
+
+@memoize_result
+def get_job_type_info(job):
+    if not job in matrix_yaml['jobs'].keys():
+        raise Exception(
+            f"Unknown job '{job}'. Valid options are: {', '.join(matrix_yaml['jobs'].keys())}")
+
+    result = matrix_yaml['jobs'][job]
+    result['id'] = job
+
+    if not 'name' in result:
+        result['name'] = job.capitalize()
+    if not 'gpu' in result:
+        result['gpu'] = False
+    if not 'needs' in result:
+        result['needs'] = None
+    if not 'invoke' in result:
+        result['invoke'] = {}
+    if not 'prefix' in result['invoke']:
+        result['invoke']['prefix'] = job
+    if not 'args' in result['invoke']:
+        result['invoke']['args'] = ""
+
+    return result
+
+
+@memoize_result
+def get_tag_info(tag):
+    if not tag in matrix_yaml['tags'].keys():
+        raise Exception(
+            f"Unknown tag '{tag}'. Valid options are: {', '.join(matrix_yaml['tags'].keys())}")
+
+    result = matrix_yaml['tags'][tag]
+    result['id'] = tag
+
+    if 'required' not in result:
+        result['required'] = False
+
+    if 'default' in result:
+        result['required'] = False
+    else:
+        result['default'] = None
+
+
+    return result
+
+
 @static_result
 def get_all_matrix_job_tags_sorted():
-    required_tags = set(matrix_yaml['required_tags'])
-    defaulted_tags = set(matrix_yaml['defaulted_tags'])
-    optional_tags = set(matrix_yaml['optional_tags'])
-    all_tags = required_tags | defaulted_tags | optional_tags
+    all_tags = set(matrix_yaml['tags'].keys())
 
     # Sorted using a highly subjective opinion on importance:
     # Always first, information dense:
     sorted_important_tags = ['project', 'jobs', 'cudacxx', 'cxx', 'ctk', 'gpu', 'std', 'sm', 'cpu']
 
     # Always last, derived:
-    sorted_noise_tags = ['os', 'origin']
+    sorted_noise_tags = ['origin']
 
     # In between?
     sorted_tags = set(sorted_important_tags + sorted_noise_tags)
@@ -136,148 +320,112 @@ def get_all_matrix_job_tags_sorted():
     return sorted_important_tags + sorted_meh_tags + sorted_noise_tags
 
 
-def lookup_os(ctk, host_compiler):
-    key = f'ctk{ctk}-{host_compiler["name"]}{host_compiler["version"]}'
-    if not key in matrix_yaml['default_os_lookup']:
-        raise Exception(f"Missing matrix.yaml `default_os_lookup` entry for key `{key}`")
-    return matrix_yaml['default_os_lookup'][key]
-
-
-def lookup_supported_stds(device_compiler=None, host_compiler=None, project=None):
+def lookup_supported_stds(matrix_job):
     stds = set(matrix_yaml['all_stds'])
-    if device_compiler:
-        key = f"{device_compiler['name']}{device_compiler['version']}"
-        if not key in matrix_yaml['lookup_cudacxx_supported_stds']:
-            raise Exception(f"Missing matrix.yaml 'lookup_cudacxx_supported_stds' entry for key '{key}'")
-        stds = stds & set(matrix_yaml['lookup_cudacxx_supported_stds'][key])
-    if host_compiler:
-        key = f"{host_compiler['name']}{host_compiler['version']}"
-        if not key in matrix_yaml['lookup_cxx_supported_stds']:
-            raise Exception(f"Missing matrix.yaml 'lookup_cxx_supported_stds' entry for key '{key}'")
-        stds = stds & set(matrix_yaml['lookup_cxx_supported_stds'][key])
-    if project:
-        key = project
-        if not key in matrix_yaml['lookup_project_supported_stds']:
-            raise Exception(f"Missing matrix.yaml 'lookup_project_supported_stds' entry for key '{key}'")
-        stds = stds & set(matrix_yaml['lookup_project_supported_stds'][key])
+    if 'ctk' in matrix_job:
+        ctk = get_ctk(matrix_job['ctk'])
+        stds = stds & set(ctk['stds'])
+    if 'cxx' in matrix_job:
+        host_compiler = get_host_compiler(matrix_job['cxx'])
+        stds = stds & set(host_compiler['stds'])
+    if 'cudacxx' in matrix_job:
+        device_compiler = get_device_compiler(matrix_job)
+        stds = stds & set(device_compiler['stds'])
+    if 'project' in matrix_job:
+        project = get_project(matrix_job['project'])
+        stds = stds & set(project['stds'])
     return sorted(list(stds))
 
 
-@memoize_result
-def lookup_job_invoke_spec(job_type):
-    if job_type in matrix_yaml['job_invoke']:
-        return matrix_yaml['job_invoke'][job_type]
-    return {'prefix': job_type}
-
-
-def get_formatted_project_name(project_name):
-    if project_name in matrix_yaml['formatted_project_names']:
-        return matrix_yaml['formatted_project_names'][project_name]
-    return project_name
-
-
-def get_formatted_host_compiler_name(host_compiler):
-    config_name = host_compiler['name']
-    if config_name in matrix_yaml['formatted_cxx_names']:
-        return matrix_yaml['formatted_cxx_names'][config_name]
-    return config_name
-
-
-def get_formatted_job_type(job_type):
-    if job_type in matrix_yaml['formatted_jobs']:
-        return matrix_yaml['formatted_jobs'][job_type]
-    # Return with first letter capitalized:
-    return job_type.capitalize()
-
-
 def is_windows(matrix_job):
-    return matrix_job['os'].startswith('windows')
+    host_compiler = get_host_compiler(matrix_job['cxx'])
+    return host_compiler['container_tag'] == 'cl'
 
 
 def generate_dispatch_group_name(matrix_job):
-    project_name = get_formatted_project_name(matrix_job['project'])
+    project = get_project(matrix_job['project'])
     ctk = matrix_job['ctk']
-    device_compiler = matrix_job['cudacxx']
-    host_compiler_name = get_formatted_host_compiler_name(matrix_job['cxx'])
+    device_compiler = get_device_compiler(matrix_job)
+    host_compiler = get_host_compiler(matrix_job['cxx'])
 
     compiler_info = ""
-    if device_compiler['name'] == 'nvcc':
-        compiler_info = f"nvcc {host_compiler_name}"
-    elif device_compiler['name'] == 'llvm':
-        compiler_info = f"clang-cuda"
+    if device_compiler['id'] == 'nvcc':
+        compiler_info = f"{device_compiler['name']} {host_compiler['name']}"
+    elif device_compiler['id'] == 'clang':
+        compiler_info = f"{device_compiler['name']}"
     else:
-        compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler_name}"
+        compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler['name']}"
 
-    return f"{project_name} {compiler_info} CTK{ctk}"
+    return f"{project['name']} CTK{ctk} {compiler_info}"
 
 
 def generate_dispatch_job_name(matrix_job, job_type):
+    job_info = get_job_type_info(job_type)
     std_str = ("C++" + str(matrix_job['std']) + " ") if 'std' in matrix_job else ''
     cpu_str = matrix_job['cpu']
-    gpu_str = (', ' + matrix_job['gpu'].upper()) if job_type in matrix_yaml['gpu_required_jobs'] else ""
-    cuda_compile_arch = (" sm{" + matrix_job['sm'] + "}") if 'sm' in matrix_job else ""
+    gpu_str = (', ' + matrix_job['gpu'].upper()) if job_info['gpu'] else ""
+    cuda_compile_arch = (" sm{" + str(matrix_job['sm']) + "}") if 'sm' in matrix_job else ""
     cmake_options = (' ' + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else ""
 
-    host_compiler_name = get_formatted_host_compiler_name(matrix_job['cxx'])
-    host_compiler_info = f"{host_compiler_name}{matrix_job['cxx']['version']}"
+    host_compiler = get_host_compiler(matrix_job['cxx'])
 
-    config_tag = f"{std_str}{host_compiler_info}"
-
-    formatted_job_type = get_formatted_job_type(job_type)
+    config_tag = f"{std_str}{host_compiler['name']}{host_compiler['version']}"
 
     extra_info = f":{cuda_compile_arch}{cmake_options}" if cuda_compile_arch or cmake_options else ""
 
-    return f"[{config_tag}] {formatted_job_type}({cpu_str}{gpu_str}){extra_info}"
+    return f"[{config_tag}] {job_info['name']}({cpu_str}{gpu_str}){extra_info}"
 
 
 def generate_dispatch_job_runner(matrix_job, job_type):
     runner_os = "windows" if is_windows(matrix_job) else "linux"
     cpu = matrix_job['cpu']
 
-    if not job_type in matrix_yaml['gpu_required_jobs']:
+    job_info = get_job_type_info(job_type)
+    if not job_info['gpu']:
         return f"{runner_os}-{cpu}-cpu16"
 
-    gpu = matrix_job['gpu']
-    suffix = "-testing" if gpu in matrix_yaml['testing_pool_gpus'] else ""
+    gpu = get_gpu(matrix_job['gpu'])
+    suffix = "-testing" if gpu['testing'] else ""
 
-    return f"{runner_os}-{cpu}-gpu-{gpu}-latest-1{suffix}"
+    return f"{runner_os}-{cpu}-gpu-{gpu['id']}-latest-1{suffix}"
 
 
 def generate_dispatch_job_ctk_version(matrix_job, job_type):
+    ".devcontainers/launch.sh --cuda option:"
     return matrix_job['ctk']
 
 
 def generate_dispatch_job_host_compiler(matrix_job, job_type):
-    return matrix_job['cxx']['name'] + matrix_job['cxx']['version']
+    ".devcontainers/launch.sh --host option:"
+    host_compiler = get_host_compiler(matrix_job['cxx'])
+    return host_compiler['container_tag'] + host_compiler['version']
 
 
 def generate_dispatch_job_image(matrix_job, job_type):
     devcontainer_version = matrix_yaml['devcontainer_version']
     ctk = matrix_job['ctk']
-    image_os = matrix_job['os']
-    host_compiler = matrix_job['cxx']['name'] + matrix_job['cxx']['version']
+    host_compiler = generate_dispatch_job_host_compiler(matrix_job, job_type)
 
     if is_windows(matrix_job):
-        return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}-{image_os}"
+        return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}"
 
-    return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}-{image_os}"
+    return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}"
 
 
 def generate_dispatch_job_command(matrix_job, job_type):
     script_path = "./ci/windows" if is_windows(matrix_job) else "./ci"
     script_ext = ".ps1" if is_windows(matrix_job) else ".sh"
 
-    job_invoke_spec = lookup_job_invoke_spec(job_type)
-    job_prefix = job_invoke_spec['prefix']
-    job_args = job_invoke_spec['args'] if 'args' in job_invoke_spec else ""
+    job_info = get_job_type_info(job_type)
+    job_prefix = job_info['invoke']['prefix']
+    job_args = job_info['invoke']['args']
 
-    project = matrix_job['project']
-    script_name = f"{script_path}/{job_prefix}_{project}{script_ext}"
+    project = get_project(matrix_job['project'])
+    script_name = f"{script_path}/{job_prefix}_{project['id']}{script_ext}"
 
     std_str = str(matrix_job['std']) if 'std' in matrix_job else ''
 
-    device_compiler_name = matrix_job['cudacxx']['name']
-    device_compiler_exe = matrix_job['cudacxx']['exe']
+    device_compiler = get_device_compiler(matrix_job)
 
     cuda_compile_arch = matrix_job['sm'] if 'sm' in matrix_job else ''
     cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else ''
@@ -289,8 +437,8 @@ def generate_dispatch_job_command(matrix_job, job_type):
         command += f" -std \"{std_str}\""
     if cuda_compile_arch:
         command += f" -arch \"{cuda_compile_arch}\""
-    if device_compiler_name != 'nvcc':
-        command += f" -cuda \"{device_compiler_exe}\""
+    if device_compiler['id'] != 'nvcc':
+        command += f" -cuda \"{device_compiler['exe']}\""
     if cmake_options:
         command += f" -cmake-options \"{cmake_options}\""
 
@@ -298,28 +446,34 @@ def generate_dispatch_job_command(matrix_job, job_type):
 
 
 def generate_dispatch_job_origin(matrix_job, job_type):
+    # Already has silename, line number, etc:
     origin = matrix_job['origin'].copy()
 
-    matrix_job = matrix_job.copy()
-    del matrix_job['origin']
+    origin_job = matrix_job.copy()
+    del origin_job['origin']
 
-    matrix_job['jobs'] = get_formatted_job_type(job_type)
+    job_info = get_job_type_info(job_type)
 
-    if 'cxx' in matrix_job:
-        host_compiler = matrix_job['cxx']
-        formatted_name = get_formatted_host_compiler_name(host_compiler)
-        matrix_job['cxx_name'] = formatted_name
-        matrix_job['cxx_full'] = formatted_name + host_compiler['version']
-        del matrix_job['cxx']
+    # The origin tags are used to build the execution summary for the CI PR comment.
+    # Use the human readable job label for the execution summary:
+    origin_job['jobs'] = job_info['name']
 
-    if 'cudacxx' in matrix_job:
-        device_compiler = matrix_job['cudacxx']
-        formatted_name = 'clang-cuda' if device_compiler['name'] == 'llvm' else device_compiler['name']
-        matrix_job['cudacxx_name'] = formatted_name
-        matrix_job['cudacxx_full'] = formatted_name + device_compiler['version']
-        del matrix_job['cudacxx']
+    # Replace some of the clunkier tags with a summary-friendly version:
+    if 'cxx' in origin_job:
+        host_compiler = get_host_compiler(matrix_job['cxx'])
+        del origin_job['cxx']
+
+        origin_job['cxx'] = host_compiler['name'] + host_compiler['version']
+        origin_job['cxx_family'] = host_compiler['name']
+
+    if 'cudacxx' in origin_job:
+        device_compiler = get_device_compiler(matrix_job)
+        del origin_job['cudacxx']
+
+        origin_job['cudacxx'] = device_compiler['name'] + device_compiler['version']
+        origin_job['cudacxx_family'] = device_compiler['name']
 
-    origin['matrix_job'] = matrix_job
+    origin['matrix_job'] = origin_job
 
     return origin
 
@@ -337,16 +491,16 @@ def generate_dispatch_job_json(matrix_job, job_type):
 
 
 # Create a single build producer, and a separate consumer for each test_job_type:
-def generate_dispatch_build_and_test_json(matrix_job, build_job_type, test_job_types):
-    build_json = generate_dispatch_job_json(matrix_job, build_job_type)
+def generate_dispatch_two_stage_json(matrix_job, producer_job_type, consumer_job_types):
+    producer_json = generate_dispatch_job_json(matrix_job, producer_job_type)
 
-    test_json = []
-    for test_job_type in test_job_types:
-        test_json.append(generate_dispatch_job_json(matrix_job, test_job_type))
+    consumers_json = []
+    for consumer_job_type in consumer_job_types:
+        consumers_json.append(generate_dispatch_job_json(matrix_job, consumer_job_type))
 
     return {
-        "producers": [build_json],
-        "consumers": test_json
+        "producers": [producer_json],
+        "consumers": consumers_json
     }
 
 
@@ -359,21 +513,27 @@ def generate_dispatch_group_jobs(matrix_job):
     # The jobs tag is left unexploded to optimize scheduling here.
     job_types = set(matrix_job['jobs'])
 
-    # Identify jobs that require a build job to run first:
-    build_required = set(matrix_yaml['build_required_jobs']) & job_types
+    # Add all dpendencies to the job_types set:
+    standalone = set([])
+    two_stage = {}  # {producer: set([consumer, ...])}
+    for job_type in job_types:
+        job_info = get_job_type_info(job_type)
+        dep = job_info['needs']
+        if dep:
+            if dep in two_stage:
+                two_stage[dep].add(job_type)
+            else:
+                two_stage[dep] = set([job_type])
+        else:
+            standalone.add(job_type)
 
-    if build_required and not 'build' in job_types:
-        raise Exception(error_message_with_matrix_job(
-            matrix_job, f"Internal error: Missing 'build' job type required by other jobs ({build_required})."))
+    standalone.difference_update(two_stage.keys())
 
-    if build_required:
+    for producer, consumers in two_stage.items():
         dispatch_group_jobs['two_stage'].append(
-            generate_dispatch_build_and_test_json(matrix_job, "build", list(build_required)))
-        job_types -= {'build'}
-        job_types -= build_required
+            generate_dispatch_two_stage_json(matrix_job, producer, list(consumers)))
 
-    # Remaining jobs are assumed to be standalone (e.g. nvrtc):
-    for job_type in job_types:
+    for job_type in standalone:
         dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, job_type))
 
     return dispatch_group_jobs
@@ -585,22 +745,6 @@ def get_matrix_job_origin(matrix_job, workflow_name, workflow_location):
     }
 
 
-def remove_skip_test_jobs(matrix_jobs):
-    '''Remove jobs defined in `matrix_file.skip_test_jobs`.'''
-    new_matrix_jobs = []
-    for matrix_job in matrix_jobs:
-        jobs = matrix_job['jobs']
-        new_jobs = set()
-        for job in jobs:
-            if not job in matrix_yaml['skip_test_jobs']:
-                new_jobs.add(job)
-        if new_jobs:
-            new_matrix_job = copy.deepcopy(matrix_job)
-            new_matrix_job['jobs'] = list(new_jobs)
-            new_matrix_jobs.append(new_matrix_job)
-    return new_matrix_jobs
-
-
 @static_result
 def get_excluded_matrix_jobs():
     return parse_workflow_matrix_jobs(None, 'exclude')
@@ -615,8 +759,6 @@ def apply_matrix_job_exclusion(matrix_job, exclusion):
         if not tag in matrix_job:
             return matrix_job
 
-        # print(f"tag: {tag}, excluded_values: {excluded_values}")
-
         # Some tags are left unexploded (e.g. 'jobs') to optimize scheduling,
         # so the values can be either a list or a single value.
         # Standardize to a list for comparison:
@@ -666,64 +808,77 @@ def remove_excluded_jobs(matrix_jobs):
     return filtered_matrix_jobs
 
 
-def validate_required_tags(matrix_job):
-    for tag in matrix_yaml['required_tags']:
-        if tag not in matrix_job:
-            raise Exception(error_message_with_matrix_job(matrix_job, f"Missing required tag '{tag}'"))
+def validate_tags(matrix_job, ignore_required=False):
+    all_tags = matrix_yaml['tags'].keys()
+
+    if not ignore_required:
+        for tag in all_tags:
+            tag_info = get_tag_info(tag)
+            if tag not in matrix_job:
+                if tag_info['required']:
+                    raise Exception(error_message_with_matrix_job(matrix_job, f"Missing required tag '{tag}'"))
+        if 'cudacxx' in matrix_job:
+            if matrix_job['cudacxx'] == 'clang' and ('cxx' not in matrix_job or 'clang' not in matrix_job['cxx']):
+                raise Exception(error_message_with_matrix_job(matrix_job, f"cudacxx=clang requires cxx=clang."))
 
-    all_tags = get_all_matrix_job_tags_sorted()
     for tag in matrix_job:
+        if tag == 'origin':
+            continue
         if tag not in all_tags:
             raise Exception(error_message_with_matrix_job(matrix_job, f"Unknown tag '{tag}'"))
 
-    if 'gpu' in matrix_job and matrix_job['gpu'] not in matrix_yaml['gpus']:
+    if 'gpu' in matrix_job and matrix_job['gpu'] not in matrix_yaml['gpus'].keys():
         raise Exception(error_message_with_matrix_job(matrix_job, f"Unknown gpu '{matrix_job['gpu']}'"))
 
 
 def set_default_tags(matrix_job):
-    generic_defaults = set(matrix_yaml['defaulted_tags'])
-    generic_defaults -= set(['os'])  # handled specially.
+    all_tags = matrix_yaml['tags'].keys()
+    for tag in all_tags:
+        if tag in matrix_job:
+            continue
 
-    for tag in generic_defaults:
-        if tag not in matrix_job:
-            matrix_job[tag] = matrix_yaml['default_'+tag]
+        tag_info = get_tag_info(tag)
+        if tag_info['default']:
+            matrix_job[tag] = tag_info['default']
 
 
-def set_derived_tags(matrix_job):
-    if 'os' not in matrix_job:
-        matrix_job['os'] = lookup_os(matrix_job['ctk'], matrix_job['cxx'])
+def canonicalize_tags(matrix_job):
+    if 'ctk' in matrix_job:
+        matrix_job['ctk'] = canonicalize_ctk_version(matrix_job['ctk'])
+    if 'cxx' in matrix_job:
+        matrix_job['cxx'] = canonicalize_host_compiler_name(matrix_job['cxx'])
 
-    # Expand nvcc device compiler shortcut:
-    if matrix_job['cudacxx'] == 'nvcc':
-        matrix_job['cudacxx'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'}
 
+def set_derived_tags(matrix_job):
     if 'sm' in matrix_job and matrix_job['sm'] == 'gpu':
         if not 'gpu' in matrix_job:
             raise Exception(error_message_with_matrix_job(matrix_job, f"\"sm: 'gpu'\" requires tag 'gpu'."))
-        if not matrix_job['gpu'] in matrix_yaml['gpu_sm']:
-            raise Exception(error_message_with_matrix_job(matrix_job,
-                                                          f"Missing matrix.yaml 'gpu_sm' entry for gpu '{matrix_job['gpu']}'"))
-        matrix_job['sm'] = matrix_yaml['gpu_sm'][matrix_job['gpu']]
+        gpu = get_gpu(matrix_job['gpu'])
+        matrix_job['sm'] = gpu['sm']
 
     if 'std' in matrix_job and matrix_job['std'] == 'all':
-        host_compiler = matrix_job['cxx'] if 'cxx' in matrix_job else None
-        device_compiler = matrix_job['cudacxx'] if 'cudacxx' in matrix_job else None
-        project = matrix_job['project'] if 'project' in matrix_job else None
+        matrix_job['std'] = lookup_supported_stds(matrix_job)
 
-        matrix_job['std'] = lookup_supported_stds(device_compiler, host_compiler, project)
+    # Add all deps before applying project job maps:
+    for job in matrix_job['jobs']:
+        job_info = get_job_type_info(job)
+        dep = job_info['needs']
+        if dep and dep not in matrix_job['jobs']:
+            matrix_job['jobs'].append(dep)
 
-    if matrix_job['project'] in matrix_yaml['project_expanded_tests'] and 'test' in matrix_job['jobs']:
-        matrix_job['jobs'].remove('test')
-        matrix_job['jobs'] += matrix_yaml['project_expanded_tests'][matrix_job['project']]
-
-    if (not 'build' in matrix_job['jobs'] and
-            any([job in matrix_job['jobs'] for job in matrix_yaml['build_required_jobs']])):
-        matrix_job['jobs'].append('build')
+    # Apply project job map:
+    project = get_project(matrix_job['project'])
+    for original_job, expanded_jobs in project['job_map'].items():
+        if original_job in matrix_job['jobs']:
+            matrix_job['jobs'].remove(original_job)
+            matrix_job['jobs'] += expanded_jobs
 
 
 def next_explode_tag(matrix_job):
+    non_exploded_tags = ['jobs']
+
     for tag in matrix_job:
-        if not tag in matrix_yaml['non_exploded_tags'] and isinstance(matrix_job[tag], list):
+        if not tag in non_exploded_tags and isinstance(matrix_job[tag], list):
             return tag
     return None
 
@@ -744,16 +899,20 @@ def explode_tags(matrix_job, explode_tag=None):
     return result
 
 
-def preprocess_matrix_jobs(matrix_jobs, explode_only=False):
+def preprocess_matrix_jobs(matrix_jobs, is_exclusion_matrix=False):
     result = []
-    if explode_only:
+    if is_exclusion_matrix:
         for matrix_job in matrix_jobs:
-            result.extend(explode_tags(matrix_job))
+            validate_tags(matrix_job, ignore_required=True)
+            for job in explode_tags(matrix_job):
+                canonicalize_tags(job)
+                result.append(job)
     else:
         for matrix_job in matrix_jobs:
-            validate_required_tags(matrix_job)
+            validate_tags(matrix_job)
             set_default_tags(matrix_job)
             for job in explode_tags(matrix_job):
+                canonicalize_tags(job)
                 set_derived_tags(job)
                 # The derived tags may need to be exploded again:
                 result.extend(explode_tags(job))
@@ -783,11 +942,9 @@ def parse_workflow_matrix_jobs(args, workflow_name):
             matrix_job['origin'] = get_matrix_job_origin(matrix_job, workflow_name, workflow_location)
 
     # Fill in default values, explode lists.
-    matrix_jobs = preprocess_matrix_jobs(matrix_jobs, explode_only=is_exclusion_matrix)
+    matrix_jobs = preprocess_matrix_jobs(matrix_jobs, is_exclusion_matrix)
 
     if args:
-        if args.skip_tests:
-            matrix_jobs = remove_skip_test_jobs(matrix_jobs)
         if args.dirty_projects != None:  # Explicitly check for None, as an empty list is valid:
             matrix_jobs = [job for job in matrix_jobs if job['project'] in args.dirty_projects]
 
@@ -861,7 +1018,6 @@ def process_job_array(group_name, array_name, parent_json):
 
     os.makedirs("workflow", exist_ok=True)
     write_json_file("workflow/workflow.json", final_workflow)
-    write_json_file("workflow/workflow_keys.json", list(final_workflow.keys()))
     write_json_file("workflow/job_ids.json", id_to_full_job_name)
     write_text_file("workflow/job_list.txt", "\n".join(job_list))
     write_json_file("workflow/runner_summary.json", runner_json)
@@ -903,7 +1059,7 @@ def print_devcontainer_info(args):
         matrix_jobs.extend(parse_workflow_matrix_jobs(args, workflow_name))
 
     # Remove all but the following keys from the matrix jobs:
-    keep_keys = ['ctk', 'cxx', 'os']
+    keep_keys = ['ctk', 'cxx']
     combinations = [{key: job[key] for key in keep_keys} for job in matrix_jobs]
 
     # Remove duplicates and filter out windows jobs:
@@ -913,10 +1069,11 @@ def print_devcontainer_info(args):
             unique_combinations.append(combo)
 
     for combo in unique_combinations:
-        combo['compiler_name'] = combo['cxx']['name']
-        combo['compiler_version'] = combo['cxx']['version']
-        combo['compiler_exe'] = combo['cxx']['exe']
+        host_compiler = get_host_compiler(combo['cxx'])
         del combo['cxx']
+        combo['compiler_name'] = host_compiler['container_tag']
+        combo['compiler_version'] = host_compiler['version']
+        combo['compiler_exe'] = host_compiler['exe']
 
         combo['cuda'] = combo['ctk']
         del combo['ctk']
@@ -927,6 +1084,23 @@ def print_devcontainer_info(args):
     print(json.dumps(devcontainer_json, indent=2))
 
 
+def preprocess_matrix_yaml(matrix):
+    # Make all CTK version keys into strings:
+    new_ctk = {}
+    for version, attrs in matrix['ctk_versions'].items():
+        new_ctk[str(version)] = attrs
+    matrix['ctk_versions'] = new_ctk
+
+    # Make all compiler version keys into strings:
+    for id, hc_def in matrix['host_compilers'].items():
+        new_versions = {}
+        for version, attrs in hc_def['versions'].items():
+            new_versions[str(version)] = attrs
+        hc_def['versions'] = new_versions
+
+    return matrix
+
+
 def main():
     parser = argparse.ArgumentParser(description='Compute matrix for workflow')
     parser.add_argument('matrix_file', help='Path to the matrix YAML file')
@@ -937,8 +1111,6 @@ def main():
     parser_mode.add_argument('--devcontainer-info', action='store_true',
                              help='Print devcontainer info instead of GHA workflows.')
     parser.add_argument('--dirty-projects', nargs='*', help='Filter jobs to only these projects')
-    parser.add_argument('--skip-tests', action='store_true',
-                        help='Remove jobs defined in `matrix_file.skip_test_jobs`.')
     parser.add_argument('--allow-override', action='store_true',
                         help='If a non-empty "override" workflow exists, it will be used instead of those in --workflows.')
     args = parser.parse_args()
@@ -951,6 +1123,7 @@ def main():
     with open(args.matrix_file, 'r') as f:
         global matrix_yaml
         matrix_yaml = yaml.safe_load(f)
+        matrix_yaml = preprocess_matrix_yaml(matrix_yaml)
         matrix_yaml['filename'] = args.matrix_file
 
     if args.workflows:
diff --git a/ci/matrix.yaml b/ci/matrix.yaml
index dea971cb28..21e8d7cc94 100644
--- a/ci/matrix.yaml
+++ b/ci/matrix.yaml
@@ -1,46 +1,3 @@
-ctk_11_1: &ctk_11_1 '11.1'
-ctk_11_8: &ctk_11_8 '11.8'
-ctk_12_0: &ctk_12_0 '12.0'
-ctk_curr: &ctk_curr '12.4'
-
-# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
-devcontainer_version: '24.06'
-
-# gcc compiler configurations
-gcc6: &gcc6 { name: 'gcc', version: '6', exe: 'g++' }
-gcc7: &gcc7 { name: 'gcc', version: '7', exe: 'g++' }
-gcc8: &gcc8 { name: 'gcc', version: '8', exe: 'g++' }
-gcc9: &gcc9 { name: 'gcc', version: '9', exe: 'g++' }
-gcc10: &gcc10 { name: 'gcc', version: '10', exe: 'g++' }
-gcc11: &gcc11 { name: 'gcc', version: '11', exe: 'g++' }
-gcc12: &gcc12 { name: 'gcc', version: '12', exe: 'g++' }
-gcc13: &gcc13 { name: 'gcc', version: '13', exe: 'g++' }
-gcc-oldest: &gcc-oldest { name: 'gcc', version: '6', exe: 'g++' }
-gcc-newest: &gcc-newest { name: 'gcc', version: '13', exe: 'g++' }
-
-# LLVM Compiler configurations
-llvm9: &llvm9 { name: 'llvm', version: '9', exe: 'clang++' }
-llvm10: &llvm10 { name: 'llvm', version: '10', exe: 'clang++' }
-llvm11: &llvm11 { name: 'llvm', version: '11', exe: 'clang++' }
-llvm12: &llvm12 { name: 'llvm', version: '12', exe: 'clang++' }
-llvm13: &llvm13 { name: 'llvm', version: '13', exe: 'clang++' }
-llvm14: &llvm14 { name: 'llvm', version: '14', exe: 'clang++' }
-llvm15: &llvm15 { name: 'llvm', version: '15', exe: 'clang++' }
-llvm16: &llvm16 { name: 'llvm', version: '16', exe: 'clang++' }
-llvm17: &llvm17 { name: 'llvm', version: '17', exe: 'clang++' }
-llvm-oldest: &llvm-oldest { name: 'llvm', version: '9', exe: 'clang++' }
-llvm-newest: &llvm-newest { name: 'llvm', version: '17', exe: 'clang++' }
-
-# MSVC configs
-msvc2017: &msvc2017 { name: 'cl', version: '14.16', exe: 'cl++' }
-msvc2019: &msvc2019 { name: 'cl', version: '14.29', exe: 'cl++' }
-msvc2022_1436: &msvc2022_1436 { name: 'cl', version: '14.36', exe: 'cl++' }
-msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' }
-
-# oneAPI configs
-oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
-
-# GHA Workflow job matrices:
 workflows:
   # If any jobs appear here, they will be executed instead of `pull_request' for PRs.
   # This is useful for limiting resource usage when a full matrix is not needed.
@@ -48,337 +5,280 @@ workflows:
   #
   # Example:
   # override:
-  #   - {jobs: ['build'], project: 'thrust', std: 17, ctk: *ctk_curr, cxx: [*gcc12, *llvm16]}
+  #   - {jobs: ['test'], project: 'thrust', std: 17, ctk: 'curr', cxx: ['gcc12', 'llvm16']}
   #
   override:
 
   pull_request:
     # Old CTK
-    - {jobs: ['build'], std: 'all', ctk: *ctk_11_1, cxx: [*gcc6, *gcc7, *gcc8, *gcc9, *llvm9, *msvc2017]}
-    - {jobs: ['build'], std: 'all', ctk: *ctk_11_8, cxx: [*gcc11], sm: '60;70;80;90'}
+    - {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc6', 'gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2017']}
+    - {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
     # Current CTK
-    - {jobs: ['build'], std: 'all', cxx: [*gcc7, *gcc8, *gcc9, *gcc10, *gcc11, *gcc12]}
-    - {jobs: ['build'], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14, *llvm15, *llvm16]}
-    - {jobs: ['build'], std: 'all', cxx: [*oneapi, *msvc2019]}
-    - {jobs: ['test'],  std: 'all', cxx: [*gcc13, *llvm17, *msvc2022]}
+    - {jobs: ['build'], std: 'all', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
+    - {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16']}
+    - {jobs: ['build'], std: 'all', cxx: ['intel', 'msvc2019']}
+    - {jobs: ['test'],  std: 'all', cxx: ['gcc13', 'clang17', 'msvc2022']}
     # Modded builds:
-    - {jobs: ['build'], std: 'all', cxx: [*gcc-newest, *llvm-newest], cpu: 'arm64'}
-    - {jobs: ['build'], std: 'all', cxx: [*gcc-newest], sm: '90a'}
+    - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
+    - {jobs: ['build'], std: 'all', cxx: ['gcc'], sm: '90a'}
     # default_projects: clang-cuda
-    - {jobs: ['build'], std: [17, 20], cudacxx: *llvm-newest, cxx: *llvm-newest}
+    - {jobs: ['build'], std: [17, 20], cudacxx: 'clang', cxx: 'clang'}
     # nvrtc:
     - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'}
     # verify-codegen:
     - {jobs: ['verify_codegen'], project: 'libcudacxx'}
     # cudax has different CTK reqs:
-    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc9, *gcc10, *gcc11]}
-    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14]}
-    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*llvm15]}
-    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0,          ], std: 20,    cxx: [*msvc2022_1436]}
-    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 20,    cxx: [*msvc2022]}
-    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0           ], std: 17,    cxx: [*gcc12], sm: "90"}
-    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 17,    cxx: [*gcc12], sm: "90a"}
-    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*gcc12, *llvm16], cpu: 'arm64'}
-    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 17,    cxx: [*oneapi]}
-    - {jobs: ['test'],  project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc12]}
-    - {jobs: ['test'],  project: 'cudax', ctk: [*ctk_12_0           ], std: 'all', cxx: [*llvm14]}
-    - {jobs: ['test'],  project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*llvm16]}
+    - {jobs: ['build'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc9', 'gcc10', 'gcc11']}
+    - {jobs: ['build'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13']}
+    - {jobs: ['build'], project: 'cudax', ctk: [        'curr'], std: 'all', cxx: ['clang14', 'clang15']}
+    - {jobs: ['build'], project: 'cudax', ctk: ['12.0',       ], std: 20,    cxx: ['msvc14.36']}
+    - {jobs: ['build'], project: 'cudax', ctk: [        'curr'], std: 20,    cxx: ['msvc2022']}
+    - {jobs: ['build'], project: 'cudax', ctk: ['12.0'        ], std: 17,    cxx: ['gcc12'], sm: "90"}
+    - {jobs: ['build'], project: 'cudax', ctk: [        'curr'], std: 17,    cxx: ['gcc12'], sm: "90a"}
+    - {jobs: ['build'], project: 'cudax', ctk: [        'curr'], std: 'all', cxx: ['gcc12', 'clang16'], cpu: 'arm64'}
+    - {jobs: ['build'], project: 'cudax', ctk: [        'curr'], std: 17,    cxx: ['intel']}
+    - {jobs: ['test'],  project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
+    - {jobs: ['test'],  project: 'cudax', ctk: ['12.0'        ], std: 'all', cxx: ['clang14']}
+    - {jobs: ['test'],  project: 'cudax', ctk: [        'curr'], std: 'all', cxx: ['clang16']}
     # cccl-infra:
-    - {jobs: ['infra'], project: 'cccl', ctk: *ctk_11_1, cxx: [*gcc-oldest, *llvm-oldest]}
-    - {jobs: ['infra'], project: 'cccl', ctk: *ctk_curr, cxx: [*gcc-newest, *llvm-newest]}
+    - {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9']}
+    - {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']}
   nightly:
   # libcudacxx build fails, CUB tests fail:
-    - {jobs: ['build'], ctk: *ctk_11_1, gpu: 'v100',     sm: 'gpu', cxx: *gcc6,   std: [11],     project: ['cub']}
-    - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 'v100',     sm: 'gpu', cxx: *gcc6,   std: [11],     project: ['thrust']}
-  # - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 'v100',     sm: 'gpu', cxx: *gcc6,   std: [11]      }
+    - {jobs: ['build'], ctk: '11.1', gpu: 'v100',     sm: 'gpu', cxx: 'gcc6',   std: [11],     project: ['cub']}
+    - {jobs: ['test'],  ctk: '11.1', gpu: 'v100',     sm: 'gpu', cxx: 'gcc6',   std: [11],     project: ['thrust']}
+  # - {jobs: ['test'],  ctk: '11.1', gpu: 'v100',     sm: 'gpu', cxx: 'gcc6',   std: [11]      }
 
   # libcudacxx build fails, CUB tests fail:
-    - {jobs: ['build'], ctk: *ctk_11_1, gpu: 't4',       sm: 'gpu', cxx: *llvm9,  std: [17],     project: ['cub']}
-    - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 't4',       sm: 'gpu', cxx: *llvm9,  std: [17],     project: ['thrust']}
-  # - {jobs: ['test'],  ctk: *ctk_11_1, gpu: 't4',       sm: 'gpu', cxx: *llvm9,  std: [17]      }
+    - {jobs: ['build'], ctk: '11.1', gpu: 't4',       sm: 'gpu', cxx: 'clang9',  std: [17],     project: ['cub']}
+    - {jobs: ['test'],  ctk: '11.1', gpu: 't4',       sm: 'gpu', cxx: 'clang9',  std: [17],     project: ['thrust']}
+  # - {jobs: ['test'],  ctk: '11.1', gpu: 't4',       sm: 'gpu', cxx: 'clang9',  std: [17]      }
 
   # CUB + libcudacxx tests fails:
-    - {jobs: ['build'], ctk: *ctk_11_8, gpu: 'rtx2080',  sm: 'gpu', cxx: *gcc11,  std: [17],     project: ['libcudacxx', 'cub']}
-    - {jobs: ['test'],  ctk: *ctk_11_8, gpu: 'rtx2080',  sm: 'gpu', cxx: *gcc11,  std: [17],     project: ['thrust']}
-  # - {jobs: ['test'],  ctk: *ctk_11_8, gpu: 'rtx2080',  sm: 'gpu', cxx: *gcc11,  std: [17]      }
+    - {jobs: ['build'], ctk: '11.8', gpu: 'rtx2080',  sm: 'gpu', cxx: 'gcc11',  std: [17],     project: ['libcudacxx', 'cub']}
+    - {jobs: ['test'],  ctk: '11.8', gpu: 'rtx2080',  sm: 'gpu', cxx: 'gcc11',  std: [17],     project: ['thrust']}
+  # - {jobs: ['test'],  ctk: '11.8', gpu: 'rtx2080',  sm: 'gpu', cxx: 'gcc11',  std: [17]      }
 
   # libcudacxx tests fail:
-    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7,   std: [14],     project: ['libcudacxx']}
-    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all',    project: ['libcudacxx']}
-    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'rtx4090',  sm: 'gpu', cxx: *llvm9,  std: [11],     project: ['libcudacxx']}
-    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20], project: ['libcudacxx']}
-    - {jobs: ['build'], ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *llvm16, std: [17],     project: ['libcudacxx']}
-    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7,   std: [14],     project: ['cub', 'thrust']}
-    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all',    project: ['cub', 'thrust']}
-    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtx4090',  sm: 'gpu', cxx: *llvm9,  std: [11],     project: ['cub', 'thrust']}
-    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20], project: ['cub', 'thrust']}
-    - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *llvm16, std: [17],     project: ['cub', 'thrust']}
-   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc7,   std: [14]     }
-   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all'    }
-   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'rtx4090',  sm: 'gpu', cxx: *llvm9,  std: [11]     }
-   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20] }
-   # - {jobs: ['test'],  ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *llvm16, std: [17]     }
+    - {jobs: ['build'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7',   std: [14],     project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: 'curr', gpu: 'l4',       sm: 'gpu', cxx: 'gcc12',  std: 'all',    project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: 'curr', gpu: 'rtx4090',  sm: 'gpu', cxx: 'clang9',  std: [11],     project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'gcc12',  std: [11, 20], project: ['libcudacxx']}
+    - {jobs: ['build'], ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'clang16', std: [17],     project: ['libcudacxx']}
+    - {jobs: ['test'],  ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7',   std: [14],     project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: 'curr', gpu: 'l4',       sm: 'gpu', cxx: 'gcc12',  std: 'all',    project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: 'curr', gpu: 'rtx4090',  sm: 'gpu', cxx: 'clang9',  std: [11],     project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'gcc12',  std: [11, 20], project: ['cub', 'thrust']}
+    - {jobs: ['test'],  ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'clang16', std: [17],     project: ['cub', 'thrust']}
+   # - {jobs: ['test'],  ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7',   std: [14]     }
+   # - {jobs: ['test'],  ctk: 'curr', gpu: 'l4',       sm: 'gpu', cxx: 'gcc12',  std: 'all'    }
+   # - {jobs: ['test'],  ctk: 'curr', gpu: 'rtx4090',  sm: 'gpu', cxx: 'clang9',  std: [11]     }
+   # - {jobs: ['test'],  ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'gcc12',  std: [11, 20] }
+   # - {jobs: ['test'],  ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'clang16', std: [17]     }
 
     # nvrtc:
-    - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 't4',       sm: 'gpu', cxx: *gcc12,  std: [20],     project: ['libcudacxx']}
-    - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'rtxa6000', sm: 'gpu', cxx: *gcc12,  std: [20],     project: ['libcudacxx']}
-    - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'l4',       sm: 'gpu', cxx: *gcc12,  std: 'all',    project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4',       sm: 'gpu', cxx: 'gcc12',  std: [20],     project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc12',  std: [20],     project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4',       sm: 'gpu', cxx: 'gcc12',  std: 'all',    project: ['libcudacxx']}
   # Fails on h100:
-  # - {jobs: ['nvrtc'], ctk: *ctk_curr, gpu: 'h100',     sm: 'gpu', cxx: *gcc12,  std: [11, 20], project: ['libcudacxx']}
+  # - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100',     sm: 'gpu', cxx: 'gcc12',  std: [11, 20], project: ['libcudacxx']}
 
   # Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
   exclude:
+    # GPU runners are not available on Windows.
+    - {jobs: ['test', 'test_gpu', 'test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'], cxx: ['msvc2017', 'msvc2019', 'msvc14.36', 'msvc2022']}
     # Ubuntu 18.04 is EOL and we only use it to get access to CTK 11.1 containers for CUDA testing.
     # Disable non-CUDA tests on this platform.
-    - {jobs: ['test_cpu'], os: 'ubuntu18.04'}
-    # GPU runners are not available on Windows.
-    - {jobs: ['test', 'test_gpu', 'test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'], os: 'windows2022'}
-
-
-#
-# Resources for compute_matrix.py. These can be modified to add new jobs, etc.
-#
-# Jobs are executed by running scripts:
-# - Linux:   'ci/<job>_<project>.sh`
-# - Windows: `ci/windows/<job>_<project>.ps1`
-
-# A matrix entry must have the following tag.
-required_tags:
-  - 'jobs' # A list of job types to run (e.g. 'build', 'test', 'nvrtc', 'infra', 'verify_codegen', ...) for
-           # the specified configuration(s).
-
-# If a matrix entry omits these tags, a default value (defined later in `default_<tag>`) is used.
-defaulted_tags:
- - 'ctk'             # CUDA ToolKit version. Will be exploded if a list.
- - 'cpu'             # CPU architecture. Will be exploded if a list.
- - 'gpu'             # GPU model. Will be exploded if a list.
- - 'cxx'             # Host compiler {name, version, exe}. Will be exploded if a list.
- - 'cudacxx'         # Device compiler as {name, version, exe} or 'nvcc' to use nvcc from the specified `ctk`.
-                     # Will be exploded if a list.
- - 'project'         # Project name (e.g. libcudacxx, cub, thrust, cccl). Will be exploded if a list.
- - 'os'              # Operating system. Will be exploded if a list.
-
-# These tags will only exist if needed:
-optional_tags:
-  - 'std'             # C++ standard. Passed to script with `-std <std>`. Will be exploded if a list.
-                      # If set to 'all', all stds supported by the host/device compiler are used.
-  - 'sm'              # `CMAKE_CUDA_ARCHITECTURES` Passed to script with `-arch <sm>`.
-                      # Defaults to use the settings in the CMakePresets.json file.
-                      # Set to 'gpu' to only target the GPU in the `gpu` tag.
-                      # Can pass multiple architectures via "60;70-real;80-virtual"
-                      # Will be exploded if a list (e.g. `sm: ['60;70;80;90', '90a']` creates two jobs)
-  - 'cmake_options'   # Additional CMake options to pass to the build. Passed to script with `-cmake_options "<cmake_options>"`.
-                      # Will be exploded if a list.
+    - {jobs: ['test_cpu'], ctk: '11.1'}
 
-# `default_<tag>`: Used when the tag is omitted.
-default_ctk: *ctk_curr
-default_cudacxx: 'nvcc'
-default_cxx: *gcc-newest
-default_cpu: 'amd64'
-default_gpu: 'v100'
-default_project:
-  - 'libcudacxx'
-  - 'cub'
-  - 'thrust'
-# Special handling: lookup os from ctk/cxx info
-# See `matrix.yml` at https://github.com/rapidsai/devcontainers
-default_os_lookup:
-  'ctk11.1-gcc6': 'ubuntu18.04'
-  'ctk11.1-gcc7': 'ubuntu18.04'
-  'ctk11.1-gcc8': 'ubuntu18.04'
-  'ctk11.1-gcc9': 'ubuntu18.04'
-  'ctk11.1-llvm9': 'ubuntu18.04'
-  'ctk11.1-cl14.16': 'windows2022'
-  'ctk11.8-gcc11': 'ubuntu22.04'
-  'ctk12.0-gcc7': 'ubuntu20.04'
-  'ctk12.0-gcc8': 'ubuntu20.04'
-  'ctk12.0-gcc9': 'ubuntu20.04'
-  'ctk12.0-gcc10': 'ubuntu20.04'
-  'ctk12.0-gcc11': 'ubuntu22.04'
-  'ctk12.0-gcc12': 'ubuntu22.04'
-  'ctk12.0-llvm9': 'ubuntu20.04'
-  'ctk12.0-llvm10': 'ubuntu20.04'
-  'ctk12.0-llvm11': 'ubuntu20.04'
-  'ctk12.0-llvm12': 'ubuntu20.04'
-  'ctk12.0-llvm13': 'ubuntu20.04'
-  'ctk12.0-llvm14': 'ubuntu20.04'
-  'ctk12.0-llvm15': 'ubuntu22.04'
-  'ctk12.0-llvm16': 'ubuntu22.04'
-  'ctk12.0-cl14.29': 'windows2022'
-  'ctk12.0-cl14.36': 'windows2022'
-  'ctk12.0-cl14.39': 'windows2022'
-  'ctk12.0-oneapi2023.2.0': 'ubuntu22.04'
-  'ctk12.4-gcc7': 'ubuntu20.04'
-  'ctk12.4-gcc8': 'ubuntu20.04'
-  'ctk12.4-gcc9': 'ubuntu20.04'
-  'ctk12.4-gcc10': 'ubuntu20.04'
-  'ctk12.4-gcc11': 'ubuntu22.04'
-  'ctk12.4-gcc12': 'ubuntu22.04'
-  'ctk12.4-gcc13': 'ubuntu22.04'
-  'ctk12.4-llvm9': 'ubuntu20.04'
-  'ctk12.4-llvm10': 'ubuntu20.04'
-  'ctk12.4-llvm11': 'ubuntu20.04'
-  'ctk12.4-llvm12': 'ubuntu20.04'
-  'ctk12.4-llvm13': 'ubuntu20.04'
-  'ctk12.4-llvm14': 'ubuntu20.04'
-  'ctk12.4-llvm15': 'ubuntu22.04'
-  'ctk12.4-llvm16': 'ubuntu22.04'
-  'ctk12.4-llvm17': 'ubuntu22.04'
-  'ctk12.4-cl14.29': 'windows2022'
-  'ctk12.4-cl14.36': 'windows2022'
-  'ctk12.4-cl14.39': 'windows2022'
-  'ctk12.4-oneapi2023.2.0': 'ubuntu22.04'
 
-# Lookup supported C++ standards for a given compiler when `std: 'all'`.
-all_stds:           [11, 14, 17, 20]
-lookup_cxx_supported_stds:
-  'gcc6':           [11, 14        ]
-  'gcc7':           [11, 14, 17    ]
-  'gcc8':           [11, 14, 17    ]
-  'gcc9':           [11, 14, 17    ]
-  'gcc10':          [11, 14, 17, 20]
-  'gcc11':          [11, 14, 17, 20]
-  'gcc12':          [11, 14, 17, 20]
-  'gcc13':          [11, 14, 17, 20]
-  'llvm9':          [11, 14, 17    ]
-  'llvm10':         [11, 14, 17    ]
-  'llvm11':         [11, 14, 17, 20]
-  'llvm12':         [11, 14, 17, 20]
-  'llvm13':         [11, 14, 17, 20]
-  'llvm14':         [11, 14, 17, 20]
-  'llvm15':         [11, 14, 17, 20]
-  'llvm16':         [11, 14, 17, 20]
-  'llvm17':         [11, 14, 17, 20]
-  'cl14.16':        [    14        ]
-  'cl14.29':        [    14, 17    ]
-  'cl14.36':        [    14, 17, 20]
-  'cl14.39':        [    14, 17, 20]
-  'oneapi2023.2.0': [11, 14, 17    ]
-lookup_cudacxx_supported_stds:
-  'nvcc11.1':       [11, 14, 17    ]
-  'nvcc11.8':       [11, 14, 17    ]
-  'nvcc12.0':       [11, 14, 17, 20]
-  'nvcc12.4':       [11, 14, 17, 20]
-  'llvm16':         [11, 14, 17, 20]
-lookup_project_supported_stds:
-  'cccl':           [11, 14, 17, 20]
-  'libcudacxx':     [11, 14, 17, 20]
-  'cub':            [11, 14, 17, 20]
-  'thrust':         [11, 14, 17, 20]
-  'cudax':          [        17, 20]
+#############################################################################################
 
-# Tags that aren't exploded:
-non_exploded_tags:
-  - 'jobs' # Keeping jobs as a list allows for dependency handling of build->test steps.
 
-# Jobs that have an implied prerequisite 'build' job:
-build_required_jobs:
-  - 'test'
-  - 'test_gpu'
-  - 'test_cpu'
-  - 'test_nolid'
-  - 'test_lid0'
-  - 'test_lid1'
-  - 'test_lid2'
-
-# Jobs that require a GPU
-gpu_required_jobs:
-  - 'test'
-  - 'test_gpu'
-  - 'test_nolid'
-  - 'test_lid0'
-  - 'test_lid1'
-  - 'test_lid2'
-  - 'nvrtc'
-  - 'infra' # cccl infra's example project test launches a kernel
-
-# When --skip-tests is given to compute-matrix.py, these jobs are ignored.
-skip_test_jobs:
-  - 'test'
-  - 'test_cpu'
-  - 'test_gpu'
-  - 'test_nolid'
-  - 'test_lid0'
-  - 'test_lid1'
-  - 'test_lid2'
-  - 'nvrtc'
-  - 'infra'
-
-# Map the job type to the script invocation spec:
-# The script is invoked as `ci/<spec[prefix]>_<project>.sh <spec[args]>`.
-# 'prefix' is required. 'args' is optional.
-# If a job is not specified explicitly, the default is { 'prefix': '<job>' }.
-job_invoke:
-  'test_cpu'   : { 'prefix': 'test', 'args': '-cpu-only' }
-  'test_gpu'   : { 'prefix': 'test', 'args': '-gpu-only' }
-  'test_nolid' : { 'prefix': 'test', 'args': '-no-lid' }
-  'test_lid0'  : { 'prefix': 'test', 'args': '-lid0' }
-  'test_lid1'  : { 'prefix': 'test', 'args': '-lid1' }
-  'test_lid2'  : { 'prefix': 'test', 'args': '-lid2' }
-
-# When a listed project has a `test` job, it will be replaced with the specified list of finer-grain jobs.
-project_expanded_tests:
-  'thrust' : ['test_gpu', 'test_cpu']
-  'cub'    : ['test_nolid', 'test_lid0', 'test_lid1', 'test_lid2']
-
-# Human readable name for jobs. Default behavior is to capitalize the first letter.
-formatted_jobs:
-  'nvrtc':          'NVRTC'
-  'verify_codegen': 'VerifyCodegen'
-  'test_cpu':       'TestCPU'
-  'test_gpu':       'TestGPU'
-  'test_nolid':     'TestGPU'
-  'test_lid0':      'HostLaunch'
-  'test_lid1':      'DeviceLaunch'
-  'test_lid2':      'GraphCapture'
-
-# Human readable name for projects. Default behavior uses the project name as-is.
-formatted_project_names:
-  'libcudacxx': 'libcu++'
-  'cub':        'CUB'
-  'thrust':     'Thrust'
-  'cccl':       'CCCL'
-
-# Human readable name for compilers. Default behavior uses the "compiler.name" tag as-is.
-formatted_cxx_names:
-  'llvm':   'clang'
-  'oneapi': 'Intel'
-  'cl':     'MSVC'
-
-# All known GPUs
+# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
+devcontainer_version: '24.08'
+
+# All supported C++ standards:
+all_stds: [11, 14, 17, 20]
+
+ctk_versions:
+  11.1: { stds: [11, 14, 17,   ] }
+  11.8: { stds: [11, 14, 17,   ] }
+  12.0: { stds: [11, 14, 17, 20] }
+  12.4: { stds: [11, 14, 17, 20], aka: 'curr' }
+
+device_compilers:
+  nvcc: # Version / stds are taken from CTK
+    name: 'nvcc'
+    exe: 'nvcc'
+  clang: # Requires cxx=clang. Version / stds are taken from cxx compiler.
+    name: "ClangCUDA"
+    exe: 'clang++'
+
+host_compilers:
+  gcc:
+    name: 'GCC'
+    container_tag: 'gcc'
+    exe: 'g++'
+    versions:
+      6:  { stds: [11, 14,       ] }
+      7:  { stds: [11, 14, 17,   ] }
+      8:  { stds: [11, 14, 17,   ] }
+      9:  { stds: [11, 14, 17,   ] }
+      10: { stds: [11, 14, 17, 20] }
+      11: { stds: [11, 14, 17, 20] }
+      12: { stds: [11, 14, 17, 20] }
+      13: { stds: [11, 14, 17, 20] }
+  clang:
+    name: 'Clang'
+    container_tag: 'llvm'
+    exe: 'clang++'
+    versions:
+      9:  { stds: [11, 14, 17,   ] }
+      10: { stds: [11, 14, 17,   ] }
+      11: { stds: [11, 14, 17, 20] }
+      12: { stds: [11, 14, 17, 20] }
+      13: { stds: [11, 14, 17, 20] }
+      14: { stds: [11, 14, 17, 20] }
+      15: { stds: [11, 14, 17, 20] }
+      16: { stds: [11, 14, 17, 20] }
+      17: { stds: [11, 14, 17, 20] }
+  msvc:
+    name: 'MSVC'
+    container_tag: 'cl'
+    exe: cl
+    versions:
+      14.16: { stds: [    14,       ], aka: '2017' }
+      14.29: { stds: [    14, 17,   ], aka: '2019' }
+      14.36: { stds: [    14, 17, 20]              }
+      14.39: { stds: [    14, 17, 20], aka: '2022' }
+  intel:
+    name: 'Intel'
+    container_tag: 'oneapi'
+    exe: icpc
+    versions:
+      2023.2.0: { stds: [11, 14, 17,   ] }
+
+# Jobs support the following properties:
+#
+# - gpu: Whether the job requires a GPU runner. Default is false.
+# - name: The human-readable name of the job. Default is the capitalized job key.
+# - needs:
+#   - A list of jobs that must be completed before this job can run. Default is an empty list.
+#   - These are automatically added if needed:
+#     - Eg. "jobs: ['test']" in the workflow def will also create the required 'build' jobs.
+# - invoke:
+#   - Map the job type to the script invocation spec:
+#     - prefix: The script invocation prefix. Default is the job name.
+#     - args: Additional arguments to pass to the script. Default is no args.
+#   - The script is invoked either:
+#     linux:   `ci/windows/<spec[prefix]>_<project>.ps1 <spec[args]>`
+#     windows: `ci/<spec[prefix]>_<project>.sh <spec[args]>`
+jobs:
+  # General:
+  build: { gpu: false }
+  test:  { gpu: true, needs: 'build' }
+
+  # CCCL:
+  infra: { gpu: true } # example project launches a kernel
+
+  # libcudacxx:
+  nvrtc: { gpu: true, name: 'NVRTC' }
+  verify_codegen: { gpu: false, name: 'VerifyCodegen' }
+
+  # CUB:
+  # NoLid -> The string `lid_X` doesn't appear in the test name. Mostly warp/block tests, old device tests, and examples.
+  test_nolid: { name: 'TestGPU',      gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-no-lid'} }
+  # CUB uses `lid` to indicate launch strategies: whether CUB algorithms are:
+  # - launched from the host (lid0):
+  test_lid0:  { name: 'HostLaunch',   gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-lid0'} }
+  # - launched from the device (lid1):
+  test_lid1:  { name: 'DeviceLaunch', gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-lid1'} }
+  # - captured in a CUDA graph for deferred launch (lid2):
+  test_lid2:  { name: 'GraphCapture', gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-lid2'} }
+
+  # Thrust:
+  test_cpu: { name: 'TestCPU', gpu: false, needs: 'build', invoke: { prefix: 'test', args: '-cpu-only'} }
+  test_gpu: { name: 'TestGPU', gpu: true,  needs: 'build', invoke: { prefix: 'test', args: '-gpu-only'} }
+
+# Project have the following properties:
+#
+# Keys are project subdirectories names. These will also be used in script names.
+#
+# - stds: A list of C++ standards to test. Required.
+# - name: The human-readable name of the project. Default is the project key.
+# - job_map: Map general jobs to arrays of project-specific jobs.
+#            Useful for things like splitting cpu/gpu testing for a project.
+#            E.g. "job_map: { test: ['test_cpu', 'test_gpu'] }" replaces
+#            the "test" job with distinct "test_cpu" and "test_gpu" jobs.
+projects:
+  cccl:
+    name: 'CCCL'
+    stds: [11, 14, 17, 20]
+  libcudacxx:
+    name: 'libcu++'
+    stds: [11, 14, 17, 20]
+  cub:
+    name: 'CUB'
+    stds: [11, 14, 17, 20]
+    job_map: { test: ['test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'] }
+  thrust:
+    name: 'Thrust'
+    stds: [11, 14, 17, 20]
+    job_map: { test: ['test_cpu', 'test_gpu'] }
+  cudax:
+    stds: [17, 20]
+
+# testing -> Runner with GPU is in a nv-gh-runners testing pool
 gpus:
-  - 'v100'     # 40 runners
-  - 't4'       #  8 runners
-  - 'rtx2080'  #  8 runners
-  - 'rtxa6000' # 12 runners
-  - 'l4'       # 48 runners
-  - 'rtx4090'  # 10 runners
-  - 'h100'     # 16 runners
-
-# SM versions of GPUs
-gpu_sm:
-  'v100':     '70'
-  't4':       '75'
-  'rtx2080':  '75'
-  'rtxa6000': '86'
-  'l4':       '89'
-  'rtx4090':  '89'
-  'h100':     '90'
-
-# Memory size of GPUs
-gpu_mem_gb:
-  'v100':     '32'
-  't4':       '16'
-  'rtx2080':  '8'
-  'rtxa6000': '48'
-  'l4':       '24'
-  'rtx4090':  '24'
-  'h100':     '80'
-
-# GPUs that require `-testing` at the end of the runner pool name.
-testing_pool_gpus:
-  - 't4'
-  - 'rtx2080'
-  - 'rtxa6000'
-  - 'l4'
-  - 'rtx4090'
+  v100:     { sm: 70 }                # 32 GB,  40 runners
+  t4:       { sm: 75, testing: true } # 16 GB,   8 runners
+  rtx2080:  { sm: 75, testing: true } #  8 GB,   8 runners
+  rtxa6000: { sm: 86, testing: true } # 48 GB,  12 runners
+  l4:       { sm: 89, testing: true } # 24 GB,  48 runners
+  rtx4090:  { sm: 89, testing: true } # 24 GB,  10 runners
+  h100:     { sm: 90 }                # 80 GB,  16 runners
+
+# Tags are used to define a `matrix job` in the workflow section.
+#
+# Tags have the following options:
+#  - required: Whether the tag is required. Default is false.
+#  - default: The default value for the tag. Default is null.
+tags:
+   # An array of jobs (e.g. 'build', 'test', 'nvrtc', 'infra', 'verify_codegen', ...)
+   # See the `jobs` map.
+  jobs: { required: true }
+  # CUDA ToolKit version
+  # See the `ctks` map.
+  ctk: { default: 'curr' }
+  # CPU architecture
+  cpu: { default: 'amd64' }
+  # GPU model
+  gpu: { default: 'v100' }
+  # Host compiler {name, version, exe}
+  # See the `host_compilers` map.
+  cxx: { default: 'gcc' }
+  # Device compiler.
+  # See the `device_compilers` map.
+  cudacxx: { default: 'nvcc' }
+  # Project name (e.g. libcudacxx, cub, thrust, cccl)
+  # See the `projects` map.
+  project: { default: ['libcudacxx', 'cub', 'thrust'] }
+  # C++ standard
+  # If set to 'all', all stds supported by the ctk/compilers/project are used.
+  # If set, will be passed to script with `-std <std>`.
+  std: { required: false }
+  # GPU architecture
+  # - If set, passed to script with `-arch <sm>`.
+  # - Format is the same as `CMAKE_CUDA_ARCHITECTURES`:
+  #   - PTX only: 70-virtual
+  #   - SASS only: 70-real
+  #   - Both: 70
+  # - Can pass multiple architectures via "60;70-real;80-virtual"
+  # - Defaults to use the settings in the CMakePresets.json file.
+  # - Will be exploded if an array, e.g. `sm: ['60;70;80;90', '90a']` creates two jobs.
+  # - Set to 'gpu' to only target the GPU in the `gpu` tag.
+  sm: { required: false }
+  # Additional CMake options to pass to the build.
+  # If set, passed to script with `-cmake_options "<cmake_options>"`.
+  cmake_options: { required: false }
diff --git a/docs/libcudacxx/extended_api/asynchronous_operations.rst b/docs/libcudacxx/extended_api/asynchronous_operations.rst
index 3ca7a17c02..2cd1159af4 100644
--- a/docs/libcudacxx/extended_api/asynchronous_operations.rst
+++ b/docs/libcudacxx/extended_api/asynchronous_operations.rst
@@ -24,4 +24,4 @@ Asynchronous Operations
 .. note::
 
   **Asynchronous operations** like `memcpy_async <libcudacxx-extended-api-asynchronous-operations-memcpy-async>`
-  are non-blocking operations performed _as-if_ by a new thread of execution.
+  are non-blocking operations performed as-if by a new thread of execution.
diff --git a/thrust/testing/replace.cu b/thrust/testing/replace.cu
index 6c4c4a5e08..26446ca743 100644
--- a/thrust/testing/replace.cu
+++ b/thrust/testing/replace.cu
@@ -17,6 +17,12 @@
   && _CCCL_STD_VER == 2011
 #  define THRUST_GCC12_OMP_MISCOMPILE
 #endif
+
+// New GCC, new miscompile. 13 + TBB this time.
+#if defined(_CCCL_COMPILER_GCC) && __GNUC__ == 13 && THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_TBB
+#  define THRUST_GCC13_TBB_MISCOMPILE
+#endif
+
 template <class Vector>
 void TestReplaceSimple()
 {
@@ -92,7 +98,8 @@ void TestReplace(const size_t n)
 }
 DECLARE_VARIABLE_UNITTEST(TestReplace);
 
-#ifndef THRUST_GCC12_OMP_MISCOMPILE
+#ifndef THRUST_GCC13_TBB_MISCOMPILE
+#  ifndef THRUST_GCC12_OMP_MISCOMPILE
 template <class Vector>
 void TestReplaceCopySimple()
 {
@@ -120,6 +127,7 @@ void TestReplaceCopySimple()
   ASSERT_EQUAL(dest, result);
 }
 DECLARE_VECTOR_UNITTEST(TestReplaceCopySimple);
+#  endif
 #endif
 
 template <typename InputIterator, typename OutputIterator, typename T>