From aaba8ad7613fd5daed310867a3591d5d9c64af7d Mon Sep 17 00:00:00 2001
From: Allison Piper <alliepiper16@gmail.com>
Date: Sat, 20 Apr 2024 03:25:45 +0000
Subject: [PATCH] [DRAFT] CI experiments

---
 .github/actions/compute-matrix/action.yml     |  25 -
 .../actions/compute-matrix/compute-matrix.sh  |  82 ----
 .github/workflows/ci-dispatch-group.yml       |  46 ++
 .github/workflows/ci-dispatch-job.yml         | 118 +++++
 .github/workflows/ci-dispatch-two-stage.yml   |  52 +++
 .github/workflows/dispatch-build-and-test.yml |   1 +
 .github/workflows/nightly.yml                 |  93 ++++
 .github/workflows/pr.yml                      | 390 +++++++++-------
 ci/compute-matrix.py                          | 428 ++++++++++++++++++
 ci/inspect_changes.sh                         |  21 +-
 ci/matrix.yaml                                | 208 ++++++---
 11 files changed, 1119 insertions(+), 345 deletions(-)
 delete mode 100644 .github/actions/compute-matrix/action.yml
 delete mode 100755 .github/actions/compute-matrix/compute-matrix.sh
 create mode 100644 .github/workflows/ci-dispatch-group.yml
 create mode 100644 .github/workflows/ci-dispatch-job.yml
 create mode 100644 .github/workflows/ci-dispatch-two-stage.yml
 create mode 100644 .github/workflows/nightly.yml
 create mode 100755 ci/compute-matrix.py

diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml
deleted file mode 100644
index b8155e7aa65..00000000000
--- a/.github/actions/compute-matrix/action.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-
-name: Compute Matrix
-description: "Compute the matrix for a given matrix type from the specified matrix file"
-
-inputs:
-  matrix_query:
-    description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
-    required: true
-  matrix_file:
-    description: 'The file containing the matrix'
-    required: true
-outputs:
-  matrix:
-    description: 'The requested matrix'
-    value: ${{ steps.compute-matrix.outputs.MATRIX }}
-
-runs:
-  using: "composite"
-  steps:
-    - name: Compute matrix
-      id: compute-matrix
-      run: |
-        MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}}  ${{inputs.matrix_query}} )
-        echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
-      shell: bash -euxo pipefail {0}
diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh
deleted file mode 100755
index 1629836d216..00000000000
--- a/.github/actions/compute-matrix/compute-matrix.sh
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-write_output() {
-  local key="$1"
-  local value="$2"
-  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
-}
-
-explode_std_versions() {
-  jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))'
-}
-
-explode_libs() {
-  jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))'
-}
-
-# Filter out the libraries that are dirty
-filter_libs() {
-  all_libs=("libcudacxx" "thrust" "cub")
-  dirty_libs=()
-  for lib in "${all_libs[@]}"; do
-    dirty_var_name="${lib^^}_DIRTY"
-    # If the variable named in dirty_var_name is not set, set it to false:
-    : "${!dirty_var_name:=false}"
-    # Declare a nameref to the variable named in dirty_var_name
-    declare -n lib_dirty="$dirty_var_name"
-    # echo "${lib^^}_DIRTY: ${lib_dirty}" >> /dev/stderr
-    if [ "${lib_dirty}" = "true" ]; then
-      dirty_libs+=("$lib")
-    fi
-  done
-  # echo "Dirty libraries: ${dirty_libs[@]}" >> /dev/stderr
-
-  # Construct a regex to filter out the dirty libraries
-  dirty_lib_regex=$(IFS="|"; echo "${dirty_libs[*]}")
-  dirty_lib_regex="^(${dirty_lib_regex})\$"
-  jq_filter="map(select(.lib | test(\"$dirty_lib_regex\")))"
-  jq -cr "$jq_filter"
-}
-
-extract_matrix() {
-  local file="$1"
-  local type="$2"
-  local matrix=$(yq -o=json "$file" | jq -cr ".$type")
-  write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
-
-  local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )"
-  local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
-  write_output "PER_CUDA_COMPILER_MATRIX"  "$per_cuda_compiler_matrix"
-  write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')"
-
-  write_output "NVRTC_MATRIX" "$(echo "$matrix" | jq '.nvrtc' | explode_std_versions)"
-
-  local clang_cuda_matrix="$(echo "$matrix" | jq -cr '.["clang-cuda"]' | explode_std_versions | explode_libs | filter_libs)"
-  write_output "CLANG_CUDA_MATRIX" "$clang_cuda_matrix"
-  write_output "CCCL_INFRA_MATRIX" "$(echo "$matrix" | jq -cr '.["cccl-infra"]' )"
-}
-
-main() {
-  if [ "$1" == "-v" ]; then
-    set -x
-    shift
-  fi
-
-  if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then
-    echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE"
-    echo "  -v            : Enable verbose output"
-    echo "  MATRIX_FILE   : The path to the matrix file."
-    echo "  MATRIX_TYPE   : The desired matrix. Supported values: 'pull_request'"
-    exit 1
-  fi
-
-  echo "Input matrix file:" >&2
-  cat "$1" >&2
-  echo "Matrix Type: $2" >&2
-
-  extract_matrix "$1" "$2"
-}
-
-main "$@"
diff --git a/.github/workflows/ci-dispatch-group.yml b/.github/workflows/ci-dispatch-group.yml
new file mode 100644
index 00000000000..43ce2583bf4
--- /dev/null
+++ b/.github/workflows/ci-dispatch-group.yml
@@ -0,0 +1,46 @@
+name: "CI/Dispatch/Group"
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      name: {type: string, required: true}
+      jobs: {type: string, required: true}
+
+permissions:
+  contents: read
+
+jobs:
+  standlone-jobs:
+    if: fromJSON(inputs.jobs)['standalone']
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.jobs)['standalone']}}
+    uses: ./.github/workflows/ci-dispatch-job.yml
+    with:
+      dispatch: ${{ matrix.dispatch }}
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
+
+  two-stage-jobs:
+    if: fromJSON(inputs.jobs)['two_stage']
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.jobs)['two_stage']}}
+    uses: ./.github/workflows/ci-dispatch-two-stage.yml
+    with:
+      producers: ${{ toJSON(matrix.producers) }}
+      consumers: ${{ toJSON(matrix.consumers) }}
diff --git a/.github/workflows/ci-dispatch-job.yml b/.github/workflows/ci-dispatch-job.yml
new file mode 100644
index 00000000000..3e135203f36
--- /dev/null
+++ b/.github/workflows/ci-dispatch-job.yml
@@ -0,0 +1,118 @@
+name: "CI/Dispatch/Job"
+
+defaults:
+  run:
+    shell: bash
+
+on:
+  workflow_call:
+    inputs:
+      dispatch: {type: string, required: true}
+      name: {type: string, required: true}
+      image: {type: string, required: true}
+      runner: {type: string, required: true}
+      command: {type: string, required: true}
+      env: {type: string, required: false}
+      dummy_matrix: {type: string, required: false, default: '[{"valid": true}]'}
+
+permissions:
+  contents: read
+
+jobs:
+  linux:
+    name: ${{inputs.name}}
+    if: startsWith(inputs.dispatch, 'linux')
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(startsWith(inputs.dispatch, 'linux') && inputs.dummy_matrix || '[]') }}
+    runs-on: ${{inputs.runner}}
+    container:
+      options: -u root
+      image: ${{inputs.image}}
+      env:
+        NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+        with:
+          path: cccl
+          persist-credentials: false
+      - name: Move files to coder user home directory
+        run: |
+          cp -R cccl /home/coder/cccl
+          chown -R coder:coder /home/coder/
+      - name: Add NVCC problem matcher
+        run: |
+          echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json"
+      - name: Configure credentials and environment variables for sccache
+        uses: ./cccl/.github/actions/configure_cccl_sccache
+      - name: Run command
+        shell: su coder {0}
+        run: |
+            set -eo pipefail
+            cd ~/cccl
+            echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m"
+            echo -e "\e[1;34m${{inputs.command}}\e[0m"
+            eval "${{inputs.command}}" || exit_code=$?
+            if [ ! -z "$exit_code" ]; then
+              echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
+              echo "::error:: To replicate this failure locally, follow the steps below:"
+              echo "1. Clone the repository, and navigate to the correct branch and commit:"
+              echo "   git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
+              echo ""
+              echo "2. Run the failed command inside the same Docker container used by the CI:"
+              echo "   docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
+              echo ""
+              echo "For additional information, see:"
+              echo "   - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
+              echo "   - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
+              exit $exit_code
+            fi
+
+  windows:
+    name: ${{inputs.name}}
+    if: startsWith(inputs.dispatch, 'windows')
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(startsWith(inputs.dispatch, 'windows') && inputs.dummy_matrix || '[]') }}
+    runs-on: ${{inputs.runner}}
+    env:
+      SCCACHE_BUCKET: rapids-sccache-devs
+      SCCACHE_REGION: us-east-2
+      SCCACHE_IDLE_TIMEOUT: 0
+      SCCACHE_S3_USE_SSL: true
+      SCCACHE_S3_NO_CREDENTIALS: false
+    steps:
+      - name: Get AWS credentials for sccache bucket
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+          aws-region: us-east-2
+          role-duration-seconds: 43200 # 12 hours
+      - name: Fetch ${{ inputs.image }}
+        shell: powershell
+        run: docker pull ${{ inputs.image }}
+      - name: Run Command
+        shell: powershell
+        run: >-
+          docker run ${{ inputs.image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
+                                                        [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')
+                                                        [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
+                                                        [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
+                                                        git clone https://github.com/NVIDIA/cccl.git;
+                                                        cd cccl;
+                                                        git fetch --all;
+                                                        git checkout ${{github.ref_name}};
+                                                        ${{inputs.command}}"
diff --git a/.github/workflows/ci-dispatch-two-stage.yml b/.github/workflows/ci-dispatch-two-stage.yml
new file mode 100644
index 00000000000..4a9cf8e3223
--- /dev/null
+++ b/.github/workflows/ci-dispatch-two-stage.yml
@@ -0,0 +1,52 @@
+name: "CI/Dispatch/TwoStage"
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      producers: {type: string, required: true}
+      consumers: {type: string, required: true}
+
+permissions:
+  contents: read
+
+jobs:
+  producers:
+    name: ${{ matrix.name }}
+    if: fromJSON(inputs.producers)
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.producers)}}
+    uses: ./.github/workflows/ci-dispatch-job.yml
+    with:
+      dispatch: ${{ matrix.dispatch }}
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
+
+  consumers:
+    name: ${{ matrix.name }}
+    if: fromJSON(inputs.consumers)
+    needs: producers
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{fromJSON(inputs.consumers)}}
+    uses: ./.github/workflows/ci-dispatch-job.yml
+    with:
+      dispatch: ${{ matrix.dispatch }}
+      name:     ${{ matrix.name }}
+      runner:   ${{ matrix.runner }}
+      image:    ${{ matrix.image }}
+      command:  ${{ matrix.command }}
diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
index 7b5ed4ef272..3f8227152ed 100644
--- a/.github/workflows/dispatch-build-and-test.yml
+++ b/.github/workflows/dispatch-build-and-test.yml
@@ -4,6 +4,7 @@ on:
   workflow_call:
     inputs:
       project_name: {type: string, required: true}
+      job_type: {type: string, required: true}
       per_cuda_compiler_matrix: {type: string, required: true}
       devcontainer_version: {type: string, required: true}
       is_windows: {type: boolean, required: true}
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
new file mode 100644
index 00000000000..796b16ba0c0
--- /dev/null
+++ b/.github/workflows/nightly.yml
@@ -0,0 +1,93 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main workflow that runs on every PR and push to main
+name: nightly
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+on:
+  # FIXME: This should be a cron job that runs nightly
+  push: # Testing only
+    branches:
+      - "pull-request/[0-9]+"
+  # schedule:
+  #   - cron: '0 7 * * *' # 7AM UTC, 12AM PST, 3AM EST
+
+# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts.
+concurrency:
+  group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pull-requests: read
+
+jobs:
+  compute-matrix:
+    name: Compute matrix
+    runs-on: ubuntu-latest
+    outputs:
+      WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}}
+      WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}}
+    steps:
+      - name: Get Base Branch from PR
+        id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - name: Identify dirty subprojects
+        id: inspect-changes
+        run: |
+          ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA}
+        env:
+          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+      - name: Compute matrix outputs
+        id: compute-matrix
+        run: |
+          ci/compute-matrix.py ci/matrix.yaml ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }}
+
+  dispatch-groups:
+    name: ${{ matrix.name }}
+    needs:
+      - compute-matrix
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }}
+    uses: ./.github/workflows/ci-dispatch-group.yml
+    with:
+      name: ${{ matrix.name }}
+      jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }}
+
+  # This job is the final job that runs after all other jobs and is used for branch protection status checks.
+  # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
+  # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101
+  ci:
+    runs-on: ubuntu-latest
+    name: CI
+    if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
+    needs:
+      - dispatch-groups
+    steps:
+      - name: Check status of all precursor jobs
+        if: >-
+          ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}}
+        run: exit 1
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 3dcee0cf6c6..9ab6e3ca5b2 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 # This is the main workflow that runs on every PR and push to main
-name: pr
+name: pull_request
 
 defaults:
   run:
@@ -35,13 +35,13 @@ permissions:
   pull-requests: read
 
 jobs:
-  inspect-changes:
-    name: "Inspect Changes"
+  compute-matrix:
+    name: Compute matrix
     runs-on: ubuntu-latest
     outputs:
-      LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }}
-      CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }}
-      THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }}
+      DEVCONTAINER_VERSION: ${{steps.compute-matrix.outputs.DEVCONTAINER_VERSION}}
+      WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}}
+      WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}}
     steps:
       - name: Get Base Branch from PR
         id: get-pr-info
@@ -49,175 +49,31 @@ jobs:
       - name: Checkout repo
         uses: actions/checkout@v3
       - name: Identify dirty subprojects
-        id: set-outputs
+        id: inspect-changes
         run: |
           ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA}
         env:
           BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
-
-  compute-matrix:
-    name: Compute matrix
-    runs-on: ubuntu-latest
-    needs:
-      - inspect-changes
-    outputs:
-      DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
-      PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}}
-      PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}}
-      NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}}
-      CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}}
-      CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
       - name: Compute matrix outputs
-        id: set-outputs
+        id: compute-matrix
         run: |
-          .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
-        env:
-          THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }}
-          CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }}
-          LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }}
-
-  nvrtc:
-    name: libcudacxx NVRTC CUDA${{matrix.cuda}}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
-    uses: ./.github/workflows/run-as-coder.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }}
-    with:
-      name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}}
-
-  thrust:
-    name: Thrust CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "thrust"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
-
-  cub:
-    name: CUB CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
-    needs:
-      - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "cub"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
+          ci/compute-matrix.py ci/matrix.yaml ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }}
 
-  libcudacxx:
-    name: libcudacxx CUDA${{ matrix.cuda_host_combination }}
-    permissions:
-      id-token: write
-      contents: read
+  dispatch-groups:
+    name: ${{ matrix.name }}
     needs:
       - compute-matrix
-      - inspect-changes
-    if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
-    uses: ./.github/workflows/dispatch-build-and-test.yml
-    strategy:
-      fail-fast: false
-      matrix:
-        cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
-    with:
-      project_name: "libcudacxx"
-      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
-      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
-      is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
-
-  clang-cuda:
-    name: ${{matrix.lib}} Clang CUDA
     permissions:
       id-token: write
       contents: read
-    needs: compute-matrix
     strategy:
       fail-fast: false
       matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
+        name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }}
+    uses: ./.github/workflows/ci-dispatch-group.yml
     with:
-      name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}}
-      runner: linux-${{matrix.cpu}}-cpu16
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}"
-
-  cccl-infra:
-    name: CCCL Infrastructure
-    permissions:
-      id-token: write
-      contents: read
-    needs: compute-matrix
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
-    with:
-      name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}
-        ctest --preset=cccl-infra
-
-  verify-devcontainers:
-    name: Verify Dev Containers
-    permissions:
-      id-token: write
-      contents: read
-    uses: ./.github/workflows/verify-devcontainers.yml
-
-  verify-codegen:
-    name: Verify Codegen in libcudacxx
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-      - name: Run libcudacxx codegen verification
-        id: verify-codegen
-        run: |
-          sudo apt-get update
-          sudo apt-get install ninja-build
-          export CXX="g++"
-          ./ci/verify_codegen.sh
+      name: ${{ matrix.name }}
+      jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }}
 
   # This job is the final job that runs after all other jobs and is used for branch protection status checks.
   # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
@@ -227,16 +83,216 @@ jobs:
     name: CI
     if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
     needs:
-      - clang-cuda
-      - cub
-      - libcudacxx
-      - nvrtc
-      - thrust
-      - cccl-infra
-      - verify-devcontainers
-      - verify-codegen
+      - dispatch-groups
     steps:
       - name: Check status of all precursor jobs
         if: >-
           ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}}
         run: exit 1
+
+# jobs:
+#   inspect-changes:
+#     name: "Inspect Changes"
+#     runs-on: ubuntu-latest
+#     outputs:
+#       LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }}
+#       CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }}
+#       THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }}
+#     steps:
+#       - name: Get Base Branch from PR
+#         id: get-pr-info
+#         uses: nv-gha-runners/get-pr-info@main
+#       - name: Checkout repo
+#         uses: actions/checkout@v3
+#       - name: Identify dirty subprojects
+#         id: set-outputs
+#         run: |
+#           ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA}
+#         env:
+#           BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+
+#   compute-matrix:
+#     name: Compute matrix
+#     runs-on: ubuntu-latest
+#     needs:
+#       - inspect-changes
+#     outputs:
+#       DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
+#       PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}}
+#       PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}}
+#       NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}}
+#       CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}}
+#       CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}}
+#     steps:
+#       - name: Checkout repo
+#         uses: actions/checkout@v3
+#       - name: Compute matrix outputs
+#         id: set-outputs
+#         run: |
+#           .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
+#         env:
+#           THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }}
+#           CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }}
+#           LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }}
+
+#   nvrtc:
+#     name: libcudacxx NVRTC CUDA${{matrix.cuda}}
+#     permissions:
+#       id-token: write
+#       contents: read
+#     needs:
+#       - compute-matrix
+#       - inspect-changes
+#     if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
+#     uses: ./.github/workflows/run-as-coder.yml
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }}
+#     with:
+#       name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}}
+#       runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
+#       image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}}
+#       command: |
+#         ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}}
+
+#   thrust:
+#     name: Thrust CUDA${{ matrix.cuda_host_combination }}
+#     permissions:
+#       id-token: write
+#       contents: read
+#     needs:
+#       - compute-matrix
+#       - inspect-changes
+#     if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }}
+#     uses: ./.github/workflows/dispatch-build-and-test.yml
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
+#     with:
+#       project_name: "thrust"
+#       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
+#       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+#       is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
+
+#   cub:
+#     name: CUB CUDA${{ matrix.cuda_host_combination }}
+#     permissions:
+#       id-token: write
+#       contents: read
+#     needs:
+#       - compute-matrix
+#       - inspect-changes
+#     if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }}
+#     uses: ./.github/workflows/dispatch-build-and-test.yml
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
+#     with:
+#       project_name: "cub"
+#       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
+#       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+#       is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
+
+#   libcudacxx:
+#     name: libcudacxx CUDA${{ matrix.cuda_host_combination }}
+#     permissions:
+#       id-token: write
+#       contents: read
+#     needs:
+#       - compute-matrix
+#       - inspect-changes
+#     if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }}
+#     uses: ./.github/workflows/dispatch-build-and-test.yml
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
+#     with:
+#       project_name: "libcudacxx"
+#       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
+#       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+#       is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
+
+#   clang-cuda:
+#     name: ${{matrix.lib}} Clang CUDA
+#     permissions:
+#       id-token: write
+#       contents: read
+#     needs: compute-matrix
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }}
+#     uses: ./.github/workflows/run-as-coder.yml
+#     with:
+#       name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}}
+#       runner: linux-${{matrix.cpu}}-cpu16
+#       image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+#       command: |
+#         ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}"
+
+#   cccl-infra:
+#     name: CCCL Infrastructure
+#     permissions:
+#       id-token: write
+#       contents: read
+#     needs: compute-matrix
+#     if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }}
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }}
+#     uses: ./.github/workflows/run-as-coder.yml
+#     with:
+#       name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
+#       runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
+#       image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+#       command: |
+#         cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA}
+#         ctest --preset=cccl-infra
+
+#   verify-devcontainers:
+#     name: Verify Dev Containers
+#     permissions:
+#       id-token: write
+#       contents: read
+#     uses: ./.github/workflows/verify-devcontainers.yml
+
+#   verify-codegen:
+#     name: Verify Codegen in libcudacxx
+#     runs-on: ubuntu-latest
+#     steps:
+#       - name: Checkout repo
+#         uses: actions/checkout@v3
+#       - name: Run libcudacxx codegen verification
+#         id: verify-codegen
+#         run: |
+#           sudo apt-get update
+#           sudo apt-get install ninja-build
+#           export CXX="g++"
+#           ./ci/verify_codegen.sh
+
+#   # This job is the final job that runs after all other jobs and is used for branch protection status checks.
+#   # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
+#   # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101
+#   ci:
+#     runs-on: ubuntu-latest
+#     name: CI
+#     if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
+#     needs:
+#       - clang-cuda
+#       - cub
+#       - libcudacxx
+#       - nvrtc
+#       - thrust
+#       - cccl-infra
+#       - verify-devcontainers
+#       - verify-codegen
+#     steps:
+#       - name: Check status of all precursor jobs
+#         if: >-
+#           ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}}
+#        run: exit 1
diff --git a/ci/compute-matrix.py b/ci/compute-matrix.py
new file mode 100755
index 00000000000..6b8d3463e65
--- /dev/null
+++ b/ci/compute-matrix.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python3
+
+"""
+Concepts:
+- matrix_job: an entry of a workflow matrix, converted from matrix.yaml["workflow"][id] into a JSON object.
+  Example:
+  {
+    "job_types": [
+      "test"
+    ],
+    "ctk": "11.1",
+    "gpu": "t4",
+    "cmake_cuda_arch": "75-real",
+    "host_compiler": {
+      "name": "llvm",
+      "version": "9",
+      "exe": "clang++"
+    },
+    "std": [
+      17
+    ],
+    "projects": [
+      "libcudacxx",
+      "cub",
+      "thrust"
+    ],
+    "os": "ubuntu18.04"
+  }
+
+Matrix jobs are read from the matrix.yaml file and converted into a JSON object and passed to matrix_job_to_dispatch_group, where
+the matrix job is turned into one or more dispatch groups consisting of potentially many jobs.
+
+- dispatch_group_json: A json object used in conjunction with the ci-dispatch-groups.yml GHA workflow.
+  Example:
+  {
+    "<group name>": {
+      "standalone": [ {<job_json>}, ... ]
+      "two_stage": [ {<two_stage_json>}, ]
+    }
+  }
+
+- two_stage_json: A json object that represents bulk-synchronous producer/consumer jobs, used with ci-dispatch-two-stage.yml.
+  Example:
+  {
+    "producers": [ {<job_json>}, ... ],
+    "consumers": [ {<job_json>}, ... ]
+  }
+
+- job_json: A json object that represents a single job in a workflow. Used with ci-dispatch-job.yml.
+  Example:
+  {
+    dispatch: "...", # (linux|windows)-(cpu|gpu)
+    name: "...",
+    runner: "...",
+    image: "...",
+    command: "..." },
+  }
+"""
+
+import argparse
+import copy
+import json
+import os
+import sys
+import yaml
+
+matrix_yaml = None
+dirty_projects = []
+
+
+def write_output(key, value):
+    print(f"{key}={value}")
+
+    # Check if the GITHUB_OUTPUT environment variable is set, and write to that file if it is.
+    output_file = os.environ.get('GITHUB_OUTPUT')
+    if output_file:
+        with open(output_file, 'a') as f:
+            f.write(f"{key}={value}\n")
+
+
+def lookup_os(ctk, host_compiler_name, host_compiler_version):
+    key = f'ctk{ctk}-{host_compiler_name}{host_compiler_version}'
+    return matrix_yaml['default_os_lookup'][key]
+
+
+def get_formatted_projected_name(project_name):
+    return matrix_yaml['formatted_project_names'][project_name]
+
+
+def is_windows(matrix_job):
+    return matrix_job['os'].startswith('windows')
+
+
+def validate_matrix_job(matrix_job):
+    for tag in matrix_yaml['required_tags']:
+        if tag not in matrix_job:
+            raise Exception(f"Missing required tag {tag} in matrix job {matrix_job}")
+
+    all_tags = set(matrix_job.keys()) | set(matrix_yaml['required_tags']) | set(matrix_yaml['defaulted_tags'])
+    for tag in matrix_job:
+        if tag not in all_tags:
+            raise Exception(f"Unknown tag {tag} in matrix job {matrix_job}")
+
+
+def fill_defaults_matrix_job(matrix_job):
+    generic_defaults = set(matrix_yaml['defaulted_tags'])
+    generic_defaults -= set(['os'])  # handled specially.
+
+    for tag in generic_defaults:
+        if tag not in matrix_job:
+            matrix_job[tag] = matrix_yaml['default_'+tag]
+
+    if 'os' not in matrix_job:
+        matrix_job['os'] = lookup_os(matrix_job['ctk'],
+                                     matrix_job['host_compiler']['name'],
+                                     matrix_job['host_compiler']['version'])
+
+    # Expand nvcc device compiler shortcut:
+    if matrix_job['device_compiler'] == 'nvcc':
+        matrix_job['device_compiler'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'}
+
+
+def explode_matrix_job(matrix_job):
+    new_jobs = []
+    for tag in matrix_yaml['explodable_tags']:
+        if tag in matrix_job and isinstance(matrix_job[tag], list) and len(matrix_job[tag]) > 1:
+            for value in matrix_job[tag]:
+                new_job = copy.deepcopy(matrix_job)
+                new_job[tag] = [value]
+                exploded = explode_matrix_job(new_job)
+                if exploded:
+                    new_jobs.extend(exploded)
+                else:
+                    new_jobs.append(new_job)
+            # Only explode the first explodable tag. Recursion handles the others.
+            break
+
+    return new_jobs if len(new_jobs) > 0 else None
+
+
+def generate_dispatch_group_name(matrix_job):
+    project_name = get_formatted_projected_name(matrix_job['projects'][0])
+    ctk = matrix_job['ctk']
+    device_compiler = matrix_job['device_compiler']
+    host_compiler = matrix_job['host_compiler']
+
+    compiler_info = ""
+    if device_compiler['name'] == 'nvcc':
+        compiler_info = f"nvcc {host_compiler['name']}"
+    elif device_compiler['name'] == 'llvm':
+        compiler_info = f"clang-cuda-{device_compiler['version']}"
+    else:
+        compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler['name']}"
+
+    return f"{project_name} CTK{ctk} {compiler_info}"
+
+
+def generate_dispatch_job_runner_dispatch(matrix_job, job_type):
+    runner_os = "windows" if is_windows(matrix_job) else "linux"
+    cpu_gpu = "gpu" if job_type in matrix_yaml['gpu_required_job_types'] else "cpu"
+
+    return f"{runner_os}-{cpu_gpu}"
+
+
+def generate_dispatch_job_name(matrix_job, job_type):
+    formatted_job_type = matrix_yaml['formatted_job_types'][job_type]
+
+    gpu_str = ""
+    if job_type in matrix_yaml['gpu_required_job_types']:
+        gpu_str = " " + matrix_job['gpu'].upper()
+
+    cuda_compile_arch = (" sm{" + matrix_job['cmake_cuda_arch'] + "}") if 'cmake_cuda_arch' in matrix_job else ""
+    cmake_options = (" " + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else ""
+    cpu_str = (" " + matrix_job['cpu']) if matrix_job['cpu'] else ""
+
+    return "{}: {}-{} C++{}{}{}{}{}".format(
+        formatted_job_type,
+        matrix_job['host_compiler']['name'],
+        matrix_job['host_compiler']['version'],
+        matrix_job['std'][0],
+        gpu_str,
+        cuda_compile_arch,
+        cmake_options,
+        cpu_str
+    )
+
+
+def generate_dispatch_job_runner(matrix_job, job_type):
+    runner_os = "windows" if is_windows(matrix_job) else "linux"
+    cpu = matrix_job['cpu']
+
+    if not job_type in matrix_yaml['gpu_required_job_types']:
+        return f"{runner_os}-{cpu}-cpu16"
+
+    gpu = matrix_job['gpu']
+    suffix = "-testing" if gpu in matrix_yaml['testing_pool_gpus'] else ""
+
+    return f"{runner_os}-{cpu}-gpu-{gpu}-latest-1{suffix}"
+
+
+def generate_dispatch_job_image(matrix_job, job_type):
+    devcontainer_version = matrix_yaml['devcontainer_version']
+    ctk = matrix_job['ctk']
+    image_os = matrix_job['os']
+    host_compiler = matrix_job['host_compiler']['name'] + \
+        matrix_job['host_compiler']['version']
+
+    if is_windows(matrix_job):
+        return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}-{image_os}"
+
+    return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}-{image_os}"
+
+
+def generate_dispatch_job_command(matrix_job, job_type):
+    script_path = ".ci/windows" if is_windows(matrix_job) else ".ci"
+    script_ext = ".ps1" if is_windows(matrix_job) else ".sh"
+    script_job_type = job_type
+    script_project = matrix_job['projects'][0]
+    script_name = f"{script_path}/{script_job_type}_{script_project}{script_ext}"
+
+    std = matrix_job['std'][0]
+    host_compiler_exe = matrix_job['host_compiler']['exe']
+    device_compiler_name = matrix_job['device_compiler']['name']
+    device_compiler_exe = matrix_job['device_compiler']['exe']
+
+    cuda_compile_arch = matrix_job['cmake_cuda_arch'] if 'cmake_cuda_arch' in matrix_job else ''
+    cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else ''
+
+    command = f"\"{script_name}\" -std {std}"
+    if cuda_compile_arch:
+        command += f" -arch \"{cuda_compile_arch}\""
+    if device_compiler_name != 'nvcc':
+        command += f" -cuda \"{device_compiler_exe}\""
+    if cmake_options:
+        cmake_args = " ".join([f"{key}={value}" for key, value in cmake_options.items()])
+        command += f" -cmake-options \"{cmake_args}\""
+
+    return command
+
+
+def generate_dispatch_job_json(matrix_job, job_type):
+    return {
+        'dispatch': generate_dispatch_job_runner_dispatch(matrix_job, job_type),
+        'name': generate_dispatch_job_name(matrix_job, job_type),
+        'runner': generate_dispatch_job_runner(matrix_job, job_type),
+        'image': generate_dispatch_job_image(matrix_job, job_type),
+        'command': generate_dispatch_job_command(matrix_job, job_type)
+    }
+
+
+# Create a single build producer, and a separate consumer for each test_job_type:
+def generate_dispatch_build_and_test_json(matrix_job, build_job_type, test_job_types):
+    build_json = generate_dispatch_job_json(matrix_job, build_job_type)
+
+    test_json = {}
+    for test_job_type in test_job_types:
+        test_json.update(generate_dispatch_job_json(matrix_job, test_job_type))
+
+    return {
+        "producers": [build_json],
+        "consumers": [test_json]
+    }
+
+
+def generate_dispatch_group_jobs(matrix_job):
+    dispatch_group_jobs = {
+        "standalone": [],
+        "two_stage": []
+    }
+
+    job_types = set(copy.deepcopy(matrix_job['job_types']))
+    for job_type in job_types:
+        if (not job_type in matrix_yaml['all_job_types']):
+            raise Exception(f"Unsupported job type {job_type}")
+
+    # job_types that appear in build_required_job_types:
+    build_required = set(matrix_yaml['build_required_job_types']) & job_types
+    has_build_and_test = len(build_required) > 0
+    job_types -= build_required
+
+    has_standalone_build = 'build' in job_types and not has_build_and_test
+    job_types -= {'build'}
+
+    if has_standalone_build:
+        dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, "build"))
+    elif has_build_and_test:
+        dispatch_group_jobs['two_stage'].append(
+            generate_dispatch_build_and_test_json(matrix_job, "build", build_required))
+
+    # Remaining jobs are assumed to be standalone (e.g. nvrtc):
+    for job_type in job_types:
+        dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, job_type))
+
+    return dispatch_group_jobs
+
+
+def merge_dispatch_groups(accum_dispatch_groups, new_dispatch_groups):
+    for group_name, group_json in new_dispatch_groups.items():
+        if group_name not in accum_dispatch_groups:
+            accum_dispatch_groups[group_name] = group_json
+        else:
+            # iterate standalone and two_stage:
+            for key, value in group_json.items():
+                accum_dispatch_groups[group_name][key] += value
+
+
+def matrix_job_to_dispatch_group(matrix_job):
+    validate_matrix_job(matrix_job)
+    fill_defaults_matrix_job(matrix_job)
+
+    # If the job explodes, recurse into the results:
+    exploded_jobs = explode_matrix_job(matrix_job)
+    if exploded_jobs is not None:
+        all_dispatch_groups = {}
+        for job in exploded_jobs:
+            dispatch_group = matrix_job_to_dispatch_group(job)
+            merge_dispatch_groups(all_dispatch_groups, dispatch_group)
+        return all_dispatch_groups
+
+    # Filter jobs that don't need to rerun:
+    if matrix_job['projects'][0] not in dirty_projects:
+        return {}
+
+    # We have a fully specified job, start processing.
+    dispatch_group_name = generate_dispatch_group_name(matrix_job)
+    dispatch_group_jobs = generate_dispatch_group_jobs(matrix_job)
+
+    return {dispatch_group_name: dispatch_group_jobs}
+
+
+def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig):
+    workflow_dispatch_groups = copy.deepcopy(workflow_dispatch_groups_orig)
+
+    # Remove all named values that contain an empty list of jobs:
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if not group_json['standalone'] and not group_json['two_stage']:
+            del workflow_dispatch_groups[group_name]
+        elif not group_json['standalone']:
+            del group_json['standalone']
+        elif not group_json['two_stage']:
+            del group_json['two_stage']
+
+    # Sort the dispatch groups by name:
+    workflow_dispatch_groups = dict(sorted(workflow_dispatch_groups.items()))
+
+    # Sort the jobs within each dispatch group:
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if 'standalone' in group_json:
+            group_json['standalone'] = sorted(group_json['standalone'], key=lambda x: x['name'])
+        if 'two_stage' in group_json:
+            group_json['two_stage'] = sorted(group_json['two_stage'], key=lambda x: x['producers'][0]['name'])
+
+    # Count the total number of jobs:
+    total_jobs = 0
+    for group_name, group_json in workflow_dispatch_groups.items():
+        if 'standalone' in group_json:
+            for job_json in group_json['standalone']:
+                total_jobs += 1
+                print(f"{total_jobs} - {group_name}: {job_json['name']}", file=sys.stderr)
+        if 'two_stage' in group_json:
+            for two_stage_json in group_json['two_stage']:
+                for job_json in two_stage_json['producers']:
+                    total_jobs += 1
+                    print(f"{total_jobs} - {group_name}: {job_json['name']}", file=sys.stderr)
+                for job_json in two_stage_json['consumers']:
+                    total_jobs += 1
+                    print(f"{total_jobs} - {group_name}: {job_json['name']}", file=sys.stderr)
+
+    print(f"Total jobs: {total_jobs}", file=sys.stderr)
+
+    return workflow_dispatch_groups
+
+
+def main():
+    global dirty_projects
+    global matrix_yaml
+
+    parser = argparse.ArgumentParser(description='Compute matrix for workflow')
+    parser.add_argument('matrix_file', help='Path to the matrix YAML file')
+    parser.add_argument('workflow', help='Name of the workflow')
+    parser.add_argument('--dirty-projects', nargs='*', dest='dirty_projects',
+                        help='Project(s) to rerun', default=[])
+    args = parser.parse_args()
+    dirty_projects = args.dirty_projects
+
+    # Check if the matrix file exists
+    if not os.path.isfile(args.matrix_file):
+        print(f"Error: Matrix file '{args.matrix_file}' does not exist.")
+        sys.exit(1)
+
+    with open(args.matrix_file, 'r') as f:
+        matrix_yaml = yaml.safe_load(f)
+
+    # Check if the workflow is valid
+    if args.workflow not in matrix_yaml:
+        print(f"Error: Workflow '{args.workflow}' does not exist in the matrix YAML.")
+        sys.exit(1)
+
+    # Print usage if no arguments are provided
+    if not args.matrix_file and not args.workflow:
+        parser.print_usage()
+        sys.exit(1)
+
+    # Print the arguments to stderr:
+    print("Arguments:", file=sys.stderr)
+    print(args, file=sys.stderr)
+    print("Matrix YAML:", file=sys.stderr)
+    print(matrix_yaml, file=sys.stderr)
+
+    matrix_json = matrix_yaml[args.workflow]
+
+    workflow_dispatch_groups = {}
+    for matrix_job in matrix_json:
+        merge_dispatch_groups(workflow_dispatch_groups, matrix_job_to_dispatch_group(matrix_job))
+
+    final_workflow = finalize_workflow_dispatch_groups(workflow_dispatch_groups)
+
+    # Pretty print the workflow json to stderr:
+    print(json.dumps(final_workflow, indent=2), file=sys.stderr)
+
+    # Print a single-line, compact version of the workflow json to stdout:
+    write_output("WORKFLOW", json.dumps(final_workflow))
+    # Print the list of key (dispatch group) names to stdout in a single line as a json list:
+    write_output("WORKFLOW_KEYS", json.dumps(list(final_workflow.keys())))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/inspect_changes.sh b/ci/inspect_changes.sh
index 59500a70554..7385318f524 100755
--- a/ci/inspect_changes.sh
+++ b/ci/inspect_changes.sh
@@ -90,19 +90,6 @@ add_dependencies() {
   return 0
 }
 
-# write_subproject_status <subproject>
-# Write the output <subproject_uppercase>_DIRTY={true|false}
-write_subproject_status() {
-  local subproject="$1"
-  local dirty_flag=${subproject^^}_DIRTY
-
-  if [[ ${!dirty_flag} -ne 0 ]]; then
-    write_output "${dirty_flag}" "true"
-  else
-    write_output "${dirty_flag}" "false"
-  fi
-}
-
 main() {
   # Print the list of subprojects and all of their dependencies:
   echo "Subprojects: ${subprojects[*]}"
@@ -144,9 +131,15 @@ main() {
   done
   echo
 
+  declare -a dirty_subprojects=()
   for subproject in "${subprojects[@]}"; do
-    write_subproject_status ${subproject}
+    var_name="${subproject^^}_DIRTY"
+    if [[ ${!var_name} -ne 0 ]]; then
+      dirty_subprojects+=("$subproject")
+    fi
   done
+
+  write_output "DIRTY_PROJECTS" "${dirty_subprojects[*]}"
 }
 
 main "$@"
diff --git a/ci/matrix.yaml b/ci/matrix.yaml
index 589de44bd3c..a50e235e87f 100644
--- a/ci/matrix.yaml
+++ b/ci/matrix.yaml
@@ -1,12 +1,6 @@
-
-cuda_prev_min: &cuda_prev_min '11.1'
-cuda_prev_max:  &cuda_prev_max  '11.8'
-cuda_curr: &cuda_curr '12.4'
-
-# The GPUs to test on
-gpus:
-  - 'a100'
-  - 'v100'
+ctk_prev_min: &ctk_prev_min '11.1'
+ctk_prev_max:  &ctk_prev_max  '11.8'
+ctk_curr: &ctk_curr '12.4'
 
 # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
 devcontainer_version: '24.06'
@@ -42,54 +36,154 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' }
 # oneAPI configs
 oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
 
-# Each environment below will generate a unique build/test job
-# See the "compute-matrix" job in the workflow for how this is parsed and used
-# cuda: The CUDA Toolkit version
-# os: The operating system used
-# cpu: The CPU architecture
-# compiler: The compiler to use
-#   name: The compiler name
-#   version: The compiler version
-#   exe: The unverionsed compiler binary name
-# std: The C++ standards to build for
-#    This field is unique as it will generate an independent build/test job for each value
+#
+# Resources for compute_matrix.py:
+#
+
+# `default_<tag>`: Used when the tag is omitted.
+default_ctk: *ctk_curr
+default_device_compiler: 'nvcc'
+default_host_compiler: *gcc12
+default_cpu: 'amd64'
+default_gpu: 'v100'
+default_projects: &default_projects
+  - 'libcudacxx'
+  - 'cub'
+  - 'thrust'
+# Special handling: lookup map
+default_os_lookup:
+  'ctk11.1-gcc6': 'ubuntu18.04'
+  'ctk11.1-gcc7': 'ubuntu18.04'
+  'ctk11.1-gcc8': 'ubuntu18.04'
+  'ctk11.1-gcc9': 'ubuntu18.04'
+  'ctk11.1-llvm9': 'ubuntu18.04'
+  'ctk11.1-cl14.16': 'windows2022'
+  'ctk11.8-gcc11': 'ubuntu22.04'
+  'ctk12.4-gcc7': 'ubuntu20.04'
+  'ctk12.4-gcc8': 'ubuntu20.04'
+  'ctk12.4-gcc9': 'ubuntu20.04'
+  'ctk12.4-gcc10': 'ubuntu20.04'
+  'ctk12.4-gcc11': 'ubuntu22.04'
+  'ctk12.4-gcc12': 'ubuntu22.04'
+  'ctk12.4-llvm9': 'ubuntu20.04'
+  'ctk12.4-llvm10': 'ubuntu20.04'
+  'ctk12.4-llvm11': 'ubuntu20.04'
+  'ctk12.4-llvm12': 'ubuntu20.04'
+  'ctk12.4-llvm13': 'ubuntu20.04'
+  'ctk12.4-llvm14': 'ubuntu20.04'
+  'ctk12.4-llvm15': 'ubuntu22.04'
+  'ctk12.4-llvm16': 'ubuntu22.04'
+  'ctk12.4-cl14.29': 'windows2022'
+  'ctk12.4-cl14.39': 'windows2022'
+  'ctk12.4-oneapi2023.2.0': 'ubuntu22.04'
+
+all_gpus:
+  - 'v100'     # ??x: sm70, 32 GB
+  - 't4'       #  8x: sm75, 16 GB
+  - 'rtx2080'  #  8x: sm75,  8 GB
+  - 'rtxa6000' # 12x: sm86, 48 GB
+  - 'l4'       # 48x: sm89, 24 GB
+  - 'rtx4090'  # 10x: sm89, 24 GB
+  - 'h100'     # 16x: sm90, ?? GB
+testing_pool_gpus:
+  - 't4'
+  - 'rtx2080'
+  - 'rtxa6000'
+  - 'l4'
+  - 'rtx4090'
+  - 'h100'
+
+all_projects:
+  - 'libcudacxx'
+  - 'cub'
+  - 'thrust'
+formatted_project_names:
+  'libcudacxx': 'libcu++'
+  'cub': 'CUB'
+  'thrust': 'Thrust'
+
+all_job_types:
+  - 'build'
+  - 'test'
+  - 'nvrtc'
+formatted_job_types:
+  'build': 'Build'
+  'test': 'Test'
+  'nvrtc': 'NVRTC'
+
+# Error if tags are missing:
+required_tags: &required_tags ['job_types']
+
+# Tags that will be added if not specified:
+defaulted_tags: &defaulted_tags ['ctk', 'cpu', 'gpu', 'host_compiler', 'device_compiler', 'std', 'projects', 'os']
+
+# Tags that may be omitted:
+optional_tags: &optional_tags ['cmake_cuda_arch', 'cmake_options']
+
+# If these tags are lists, they will be exploded into separate jobs
+explodable_tags: ['projects', 'std']
+
+# job_types that have an implied prerequisite 'build' job:
+build_required_job_types: ['test']
+
+# job_types that require a GPU
+gpu_required_job_types: ['test', 'nvrtc']
+
+#
+# Workflow matrices:
+#
 
 # Configurations that will run for every PR
 pull_request:
-  nvcc:
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc6,     std: [11, 14],         jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9,    std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [14, 17],         jobs: ['build']}
-    - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11,    std: [11, 14, 17],     jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9,     std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build', 'test']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12,    std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9,    std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10,   std: [11, 14, 17],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16,   std: [11, 14, 17, 20], jobs: ['build', 'test']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16,   std: [11, 14, 17, 20], jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [14, 17],         jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [14, 17, 20],     jobs: ['build']}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi,   std: [11, 14, 17],     jobs: ['build']}
-  nvrtc:
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', std: [11, 14, 17, 20]}
-  clang-cuda:
-    - {lib: ['thrust', 'cub', 'libcudacxx'], cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest, std: [17, 20]}
-  cccl-infra:
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest}
-    - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest}
-    - {cuda: *cuda_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest}
+  - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc6,     std: [11, 14]         }
+  - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc7,     std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc8,     std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc9,     std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *llvm9,    std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *msvc2017, std: [14, 17]         }
+  - {job_types: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11,    std: [11, 14, 17],     cmake_cuda_arch: '90'}
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc7,     std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc8,     std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc9,     std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc10,    std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc11,    std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc12,    std: [11, 14, 17, 20], cmake_cuda_arch: '90'}
+  - {job_types: ['test'],  ctk: *ctk_curr,     host_compiler: *gcc12,    std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc12,    std: [11, 14, 17, 20], cpu: 'arm64'}
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm9,    std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm10,   std: [11, 14, 17]     }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm11,   std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm12,   std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm13,   std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm14,   std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm15,   std: [11, 14, 17, 20] }
+  - {job_types: ['test'],  ctk: *ctk_curr,     host_compiler: *llvm16,   std: [11, 14, 17, 20] }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *llvm16,   std: [11, 14, 17, 20], cpu: 'arm64'}
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2019, std: [14, 17]         }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2022, std: [14, 17, 20]     }
+  - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *oneapi,   std: [11, 14, 17]     }
+  # nvrtc:
+  - {job_types: ['nvrtc'], projects: ['libcudacxx'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]}
+  # clang-cuda:
+  - {job_types: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]}
+  # cccl-infra:
+  # TODO:
+  # - {ctk: *ctk_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest}
+  # - {ctk: *ctk_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest}
+  # - {ctk: *ctk_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest}
+  # - {ctk: *ctk_curr,     os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest}
+
+# Run each night:
+nightly:
+  - {job_types: ['test'],  ctk: *ctk_prev_min, gpu: 'v100',     cmake_cuda_arch: '70-real', host_compiler: *gcc6,   std: [11]             }
+  - {job_types: ['test'],  ctk: *ctk_prev_min, gpu: 't4',       cmake_cuda_arch: '75-real', host_compiler: *llvm9,  std: [17]             }
+  - {job_types: ['test'],  ctk: *ctk_prev_max, gpu: 'rtx2080',  cmake_cuda_arch: '75-real', host_compiler: *gcc11,  std: [17]             }
+  - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'rtxa6000', cmake_cuda_arch: '86-real', host_compiler: *gcc7,   std: [14]             }
+  - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'l4',       cmake_cuda_arch: '89-real', host_compiler: *gcc12,  std: [11, 14, 17, 20] }
+  - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'rtx4090',  cmake_cuda_arch: '89-real', host_compiler: *llvm9,  std: [11]             }
+  - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90-real', host_compiler: *gcc12,  std: [11, 20]         }
+  - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90-real', host_compiler: *llvm16, std: [17]             }
+  - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 't4',       cmake_cuda_arch: '75-real', host_compiler: *gcc12,  std: [20],             projects: ['libcudacxx']}
+  - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'rtxa6000', cmake_cuda_arch: '86-real', host_compiler: *gcc12,  std: [20],             projects: ['libcudacxx']}
+  - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'l4',       cmake_cuda_arch: '89-real', host_compiler: *gcc12,  std: [11, 14, 17, 20], projects: ['libcudacxx']}
+  - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90-real', host_compiler: *gcc12,  std: [11, 20],         projects: ['libcudacxx']}