From aaba8ad7613fd5daed310867a3591d5d9c64af7d Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Sat, 20 Apr 2024 03:25:45 +0000 Subject: [PATCH] [DRAFT] CI experiments --- .github/actions/compute-matrix/action.yml | 25 - .../actions/compute-matrix/compute-matrix.sh | 82 ---- .github/workflows/ci-dispatch-group.yml | 46 ++ .github/workflows/ci-dispatch-job.yml | 118 +++++ .github/workflows/ci-dispatch-two-stage.yml | 52 +++ .github/workflows/dispatch-build-and-test.yml | 1 + .github/workflows/nightly.yml | 93 ++++ .github/workflows/pr.yml | 390 +++++++++------- ci/compute-matrix.py | 428 ++++++++++++++++++ ci/inspect_changes.sh | 21 +- ci/matrix.yaml | 208 ++++++--- 11 files changed, 1119 insertions(+), 345 deletions(-) delete mode 100644 .github/actions/compute-matrix/action.yml delete mode 100755 .github/actions/compute-matrix/compute-matrix.sh create mode 100644 .github/workflows/ci-dispatch-group.yml create mode 100644 .github/workflows/ci-dispatch-job.yml create mode 100644 .github/workflows/ci-dispatch-two-stage.yml create mode 100644 .github/workflows/nightly.yml create mode 100755 ci/compute-matrix.py diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml deleted file mode 100644 index b8155e7aa65..00000000000 --- a/.github/actions/compute-matrix/action.yml +++ /dev/null @@ -1,25 +0,0 @@ - -name: Compute Matrix -description: "Compute the matrix for a given matrix type from the specified matrix file" - -inputs: - matrix_query: - description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc" - required: true - matrix_file: - description: 'The file containing the matrix' - required: true -outputs: - matrix: - description: 'The requested matrix' - value: ${{ steps.compute-matrix.outputs.MATRIX }} - -runs: - using: "composite" - steps: - - name: Compute matrix - id: compute-matrix - run: | - MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} ) - echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT - shell: bash -euxo pipefail {0} diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh deleted file mode 100755 index 1629836d216..00000000000 --- a/.github/actions/compute-matrix/compute-matrix.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -write_output() { - local key="$1" - local value="$2" - echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" -} - -explode_std_versions() { - jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))' -} - -explode_libs() { - jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))' -} - -# Filter out the libraries that are dirty -filter_libs() { - all_libs=("libcudacxx" "thrust" "cub") - dirty_libs=() - for lib in "${all_libs[@]}"; do - dirty_var_name="${lib^^}_DIRTY" - # If the variable named in dirty_var_name is not set, set it to false: - : "${!dirty_var_name:=false}" - # Declare a nameref to the variable named in dirty_var_name - declare -n lib_dirty="$dirty_var_name" - # echo "${lib^^}_DIRTY: ${lib_dirty}" >> /dev/stderr - if [ "${lib_dirty}" = "true" ]; then - dirty_libs+=("$lib") - fi - done - # echo "Dirty libraries: ${dirty_libs[@]}" >> /dev/stderr - - # Construct a regex to filter out the dirty libraries - dirty_lib_regex=$(IFS="|"; echo "${dirty_libs[*]}") - dirty_lib_regex="^(${dirty_lib_regex})\$" - jq_filter="map(select(.lib | test(\"$dirty_lib_regex\")))" - jq -cr "$jq_filter" -} - -extract_matrix() { - local file="$1" - local type="$2" - local matrix=$(yq -o=json "$file" | jq -cr ".$type") - write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')" - - local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )" - local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')" - write_output "PER_CUDA_COMPILER_MATRIX" "$per_cuda_compiler_matrix" - write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')" - - write_output "NVRTC_MATRIX" "$(echo "$matrix" | jq '.nvrtc' | explode_std_versions)" - - local clang_cuda_matrix="$(echo "$matrix" | jq -cr '.["clang-cuda"]' | explode_std_versions | explode_libs | filter_libs)" - write_output "CLANG_CUDA_MATRIX" "$clang_cuda_matrix" - write_output "CCCL_INFRA_MATRIX" "$(echo "$matrix" | jq -cr '.["cccl-infra"]' )" -} - -main() { - if [ "$1" == "-v" ]; then - set -x - shift - fi - - if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then - echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE" - echo " -v : Enable verbose output" - echo " MATRIX_FILE : The path to the matrix file." - echo " MATRIX_TYPE : The desired matrix. Supported values: 'pull_request'" - exit 1 - fi - - echo "Input matrix file:" >&2 - cat "$1" >&2 - echo "Matrix Type: $2" >&2 - - extract_matrix "$1" "$2" -} - -main "$@" diff --git a/.github/workflows/ci-dispatch-group.yml b/.github/workflows/ci-dispatch-group.yml new file mode 100644 index 00000000000..43ce2583bf4 --- /dev/null +++ b/.github/workflows/ci-dispatch-group.yml @@ -0,0 +1,46 @@ +name: "CI/Dispatch/Group" + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + workflow_call: + inputs: + name: {type: string, required: true} + jobs: {type: string, required: true} + +permissions: + contents: read + +jobs: + standlone-jobs: + if: fromJSON(inputs.jobs)['standalone'] + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.jobs)['standalone']}} + uses: ./.github/workflows/ci-dispatch-job.yml + with: + dispatch: ${{ matrix.dispatch }} + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} + + two-stage-jobs: + if: fromJSON(inputs.jobs)['two_stage'] + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.jobs)['two_stage']}} + uses: ./.github/workflows/ci-dispatch-two-stage.yml + with: + producers: ${{ toJSON(matrix.producers) }} + consumers: ${{ toJSON(matrix.consumers) }} diff --git a/.github/workflows/ci-dispatch-job.yml b/.github/workflows/ci-dispatch-job.yml new file mode 100644 index 00000000000..3e135203f36 --- /dev/null +++ b/.github/workflows/ci-dispatch-job.yml @@ -0,0 +1,118 @@ +name: "CI/Dispatch/Job" + +defaults: + run: + shell: bash + +on: + workflow_call: + inputs: + dispatch: {type: string, required: true} + name: {type: string, required: true} + image: {type: string, required: true} + runner: {type: string, required: true} + command: {type: string, required: true} + env: {type: string, required: false} + dummy_matrix: {type: string, required: false, default: '[{"valid": true}]'} + +permissions: + contents: read + +jobs: + linux: + name: ${{inputs.name}} + if: startsWith(inputs.dispatch, 'linux') + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{ fromJSON(startsWith(inputs.dispatch, 'linux') && inputs.dummy_matrix || '[]') }} + runs-on: ${{inputs.runner}} + container: + options: -u root + image: ${{inputs.image}} + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + path: cccl + persist-credentials: false + - name: Move files to coder user home directory + run: | + cp -R cccl /home/coder/cccl + chown -R coder:coder /home/coder/ + - name: Add NVCC problem matcher + run: | + echo "::add-matcher::cccl/.github/problem-matchers/problem-matcher.json" + - name: Configure credentials and environment variables for sccache + uses: ./cccl/.github/actions/configure_cccl_sccache + - name: Run command + shell: su coder {0} + run: | + set -eo pipefail + cd ~/cccl + echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m" + echo -e "\e[1;34m${{inputs.command}}\e[0m" + eval "${{inputs.command}}" || exit_code=$? + if [ ! -z "$exit_code" ]; then + echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m" + echo "::error:: To replicate this failure locally, follow the steps below:" + echo "1. Clone the repository, and navigate to the correct branch and commit:" + echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA" + echo "" + echo "2. Run the failed command inside the same Docker container used by the CI:" + echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}" + echo "" + echo "For additional information, see:" + echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md" + echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md" + exit $exit_code + fi + + windows: + name: ${{inputs.name}} + if: startsWith(inputs.dispatch, 'windows') + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{ fromJSON(startsWith(inputs.dispatch, 'windows') && inputs.dummy_matrix || '[]') }} + runs-on: ${{inputs.runner}} + env: + SCCACHE_BUCKET: rapids-sccache-devs + SCCACHE_REGION: us-east-2 + SCCACHE_IDLE_TIMEOUT: 0 + SCCACHE_S3_USE_SSL: true + SCCACHE_S3_NO_CREDENTIALS: false + steps: + - name: Get AWS credentials for sccache bucket + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + aws-region: us-east-2 + role-duration-seconds: 43200 # 12 hours + - name: Fetch ${{ inputs.image }} + shell: powershell + run: docker pull ${{ inputs.image }} + - name: Run Command + shell: powershell + run: >- + docker run ${{ inputs.image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}') + [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}') + [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}') + [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}') + git clone https://github.com/NVIDIA/cccl.git; + cd cccl; + git fetch --all; + git checkout ${{github.ref_name}}; + ${{inputs.command}}" diff --git a/.github/workflows/ci-dispatch-two-stage.yml b/.github/workflows/ci-dispatch-two-stage.yml new file mode 100644 index 00000000000..4a9cf8e3223 --- /dev/null +++ b/.github/workflows/ci-dispatch-two-stage.yml @@ -0,0 +1,52 @@ +name: "CI/Dispatch/TwoStage" + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + workflow_call: + inputs: + producers: {type: string, required: true} + consumers: {type: string, required: true} + +permissions: + contents: read + +jobs: + producers: + name: ${{ matrix.name }} + if: fromJSON(inputs.producers) + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.producers)}} + uses: ./.github/workflows/ci-dispatch-job.yml + with: + dispatch: ${{ matrix.dispatch }} + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} + + consumers: + name: ${{ matrix.name }} + if: fromJSON(inputs.consumers) + needs: producers + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: ${{fromJSON(inputs.consumers)}} + uses: ./.github/workflows/ci-dispatch-job.yml + with: + dispatch: ${{ matrix.dispatch }} + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image: ${{ matrix.image }} + command: ${{ matrix.command }} diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml index 7b5ed4ef272..3f8227152ed 100644 --- a/.github/workflows/dispatch-build-and-test.yml +++ b/.github/workflows/dispatch-build-and-test.yml @@ -4,6 +4,7 @@ on: workflow_call: inputs: project_name: {type: string, required: true} + job_type: {type: string, required: true} per_cuda_compiler_matrix: {type: string, required: true} devcontainer_version: {type: string, required: true} is_windows: {type: boolean, required: true} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 00000000000..796b16ba0c0 --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,93 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is the main workflow that runs on every PR and push to main +name: nightly + +defaults: + run: + shell: bash -euo pipefail {0} + +on: + # FIXME: This should be a cron job that runs nightly + push: # Testing only + branches: + - "pull-request/[0-9]+" + # schedule: + # - cron: '0 7 * * *' # 7AM UTC, 12AM PST, 3AM EST + +# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts. +concurrency: + group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: read + +jobs: + compute-matrix: + name: Compute matrix + runs-on: ubuntu-latest + outputs: + WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}} + WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}} + steps: + - name: Get Base Branch from PR + id: get-pr-info + uses: nv-gha-runners/get-pr-info@main + - name: Checkout repo + uses: actions/checkout@v3 + - name: Identify dirty subprojects + id: inspect-changes + run: | + ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA} + env: + BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} + - name: Compute matrix outputs + id: compute-matrix + run: | + ci/compute-matrix.py ci/matrix.yaml ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }} + + dispatch-groups: + name: ${{ matrix.name }} + needs: + - compute-matrix + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }} + uses: ./.github/workflows/ci-dispatch-group.yml + with: + name: ${{ matrix.name }} + jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }} + + # This job is the final job that runs after all other jobs and is used for branch protection status checks. + # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks + # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101 + ci: + runs-on: ubuntu-latest + name: CI + if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success + needs: + - dispatch-groups + steps: + - name: Check status of all precursor jobs + if: >- + ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}} + run: exit 1 diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 3dcee0cf6c6..9ab6e3ca5b2 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -14,7 +14,7 @@ # limitations under the License. # This is the main workflow that runs on every PR and push to main -name: pr +name: pull_request defaults: run: @@ -35,13 +35,13 @@ permissions: pull-requests: read jobs: - inspect-changes: - name: "Inspect Changes" + compute-matrix: + name: Compute matrix runs-on: ubuntu-latest outputs: - LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }} - CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }} - THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }} + DEVCONTAINER_VERSION: ${{steps.compute-matrix.outputs.DEVCONTAINER_VERSION}} + WORKFLOW: ${{steps.compute-matrix.outputs.WORKFLOW}} + WORKFLOW_KEYS: ${{steps.compute-matrix.outputs.WORKFLOW_KEYS}} steps: - name: Get Base Branch from PR id: get-pr-info @@ -49,175 +49,31 @@ jobs: - name: Checkout repo uses: actions/checkout@v3 - name: Identify dirty subprojects - id: set-outputs + id: inspect-changes run: | ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA} env: BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} - - compute-matrix: - name: Compute matrix - runs-on: ubuntu-latest - needs: - - inspect-changes - outputs: - DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}} - PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}} - PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}} - NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}} - CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}} - CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}} - steps: - - name: Checkout repo - uses: actions/checkout@v3 - name: Compute matrix outputs - id: set-outputs + id: compute-matrix run: | - .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request - env: - THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }} - CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }} - LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }} - - nvrtc: - name: libcudacxx NVRTC CUDA${{matrix.cuda}} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} - uses: ./.github/workflows/run-as-coder.yml - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }} - with: - name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}} - runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}} - - thrust: - name: Thrust CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "thrust" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} - - cub: - name: CUB CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read - needs: - - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "cub" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} + ci/compute-matrix.py ci/matrix.yaml ${{ github.workflow }} --dirty-projects ${{ steps.inspect-changes.outputs.DIRTY_PROJECTS }} - libcudacxx: - name: libcudacxx CUDA${{ matrix.cuda_host_combination }} - permissions: - id-token: write - contents: read + dispatch-groups: + name: ${{ matrix.name }} needs: - compute-matrix - - inspect-changes - if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} - uses: ./.github/workflows/dispatch-build-and-test.yml - strategy: - fail-fast: false - matrix: - cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} - with: - project_name: "libcudacxx" - per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} - devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} - is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} - - clang-cuda: - name: ${{matrix.lib}} Clang CUDA permissions: id-token: write contents: read - needs: compute-matrix strategy: fail-fast: false matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }} - uses: ./.github/workflows/run-as-coder.yml + name: ${{ fromJSON(needs.compute-matrix.outputs.WORKFLOW_KEYS) }} + uses: ./.github/workflows/ci-dispatch-group.yml with: - name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}} - runner: linux-${{matrix.cpu}}-cpu16 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}" - - cccl-infra: - name: CCCL Infrastructure - permissions: - id-token: write - contents: read - needs: compute-matrix - if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }} - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }} - uses: ./.github/workflows/run-as-coder.yml - with: - name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}} - runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 - image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} - command: | - cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} - ctest --preset=cccl-infra - - verify-devcontainers: - name: Verify Dev Containers - permissions: - id-token: write - contents: read - uses: ./.github/workflows/verify-devcontainers.yml - - verify-codegen: - name: Verify Codegen in libcudacxx - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Run libcudacxx codegen verification - id: verify-codegen - run: | - sudo apt-get update - sudo apt-get install ninja-build - export CXX="g++" - ./ci/verify_codegen.sh + name: ${{ matrix.name }} + jobs: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.WORKFLOW)[ matrix.name ]) }} # This job is the final job that runs after all other jobs and is used for branch protection status checks. # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks @@ -227,16 +83,216 @@ jobs: name: CI if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success needs: - - clang-cuda - - cub - - libcudacxx - - nvrtc - - thrust - - cccl-infra - - verify-devcontainers - - verify-codegen + - dispatch-groups steps: - name: Check status of all precursor jobs if: >- ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}} run: exit 1 + +# jobs: +# inspect-changes: +# name: "Inspect Changes" +# runs-on: ubuntu-latest +# outputs: +# LIBCUDACXX_DIRTY: ${{ steps.set-outputs.outputs.LIBCUDACXX_DIRTY }} +# CUB_DIRTY: ${{ steps.set-outputs.outputs.CUB_DIRTY }} +# THRUST_DIRTY: ${{ steps.set-outputs.outputs.THRUST_DIRTY }} +# steps: +# - name: Get Base Branch from PR +# id: get-pr-info +# uses: nv-gha-runners/get-pr-info@main +# - name: Checkout repo +# uses: actions/checkout@v3 +# - name: Identify dirty subprojects +# id: set-outputs +# run: | +# ./ci/inspect_changes.sh ${BASE_SHA} ${GITHUB_SHA} +# env: +# BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} + +# compute-matrix: +# name: Compute matrix +# runs-on: ubuntu-latest +# needs: +# - inspect-changes +# outputs: +# DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}} +# PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}} +# PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}} +# NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}} +# CLANG_CUDA_MATRIX: ${{steps.set-outputs.outputs.CLANG_CUDA_MATRIX}} +# CCCL_INFRA_MATRIX: ${{steps.set-outputs.outputs.CCCL_INFRA_MATRIX}} +# steps: +# - name: Checkout repo +# uses: actions/checkout@v3 +# - name: Compute matrix outputs +# id: set-outputs +# run: | +# .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request +# env: +# THRUST_DIRTY: ${{ needs.inspect-changes.outputs.THRUST_DIRTY }} +# CUB_DIRTY: ${{ needs.inspect-changes.outputs.CUB_DIRTY }} +# LIBCUDACXX_DIRTY: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY }} + +# nvrtc: +# name: libcudacxx NVRTC CUDA${{matrix.cuda}} +# permissions: +# id-token: write +# contents: read +# needs: +# - compute-matrix +# - inspect-changes +# if: ${{ !contains(github.event.head_commit.message, 'skip-tests') && needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} +# uses: ./.github/workflows/run-as-coder.yml +# strategy: +# fail-fast: false +# matrix: +# include: ${{ fromJSON(needs.compute-matrix.outputs.NVRTC_MATRIX) }} +# with: +# name: Build and Test libcudacxx CUDA${{matrix.cuda}} C++${{matrix.std}} +# runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 +# image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-gcc12-cuda${{matrix.cuda}}-${{matrix.os}} +# command: | +# ./ci/nvrtc_libcudacxx.sh -cxx g++ -std ${{matrix.std}} + +# thrust: +# name: Thrust CUDA${{ matrix.cuda_host_combination }} +# permissions: +# id-token: write +# contents: read +# needs: +# - compute-matrix +# - inspect-changes +# if: ${{ needs.inspect-changes.outputs.THRUST_DIRTY == 'true' }} +# uses: ./.github/workflows/dispatch-build-and-test.yml +# strategy: +# fail-fast: false +# matrix: +# cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} +# with: +# project_name: "thrust" +# per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} +# devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} +# is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} + +# cub: +# name: CUB CUDA${{ matrix.cuda_host_combination }} +# permissions: +# id-token: write +# contents: read +# needs: +# - compute-matrix +# - inspect-changes +# if: ${{ needs.inspect-changes.outputs.CUB_DIRTY == 'true' }} +# uses: ./.github/workflows/dispatch-build-and-test.yml +# strategy: +# fail-fast: false +# matrix: +# cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} +# with: +# project_name: "cub" +# per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} +# devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} +# is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} + +# libcudacxx: +# name: libcudacxx CUDA${{ matrix.cuda_host_combination }} +# permissions: +# id-token: write +# contents: read +# needs: +# - compute-matrix +# - inspect-changes +# if: ${{ needs.inspect-changes.outputs.LIBCUDACXX_DIRTY == 'true' }} +# uses: ./.github/workflows/dispatch-build-and-test.yml +# strategy: +# fail-fast: false +# matrix: +# cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }} +# with: +# project_name: "libcudacxx" +# per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }} +# devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }} +# is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }} + +# clang-cuda: +# name: ${{matrix.lib}} Clang CUDA +# permissions: +# id-token: write +# contents: read +# needs: compute-matrix +# strategy: +# fail-fast: false +# matrix: +# include: ${{ fromJSON(needs.compute-matrix.outputs.CLANG_CUDA_MATRIX) }} +# uses: ./.github/workflows/run-as-coder.yml +# with: +# name: Build ${{matrix.lib}} ${{matrix.cpu}}/clang-cuda${{matrix.compiler.version}}/C++${{matrix.std}} +# runner: linux-${{matrix.cpu}}-cpu16 +# image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} +# command: | +# ./ci/build_${{matrix.lib}}.sh -cxx "${{matrix.compiler.exe}}" -cuda "${{matrix.compiler.exe}}" -std "${{matrix.std}}" + +# cccl-infra: +# name: CCCL Infrastructure +# permissions: +# id-token: write +# contents: read +# needs: compute-matrix +# if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }} +# strategy: +# fail-fast: false +# matrix: +# include: ${{ fromJSON(needs.compute-matrix.outputs.CCCL_INFRA_MATRIX) }} +# uses: ./.github/workflows/run-as-coder.yml +# with: +# name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}} +# runner: linux-${{matrix.cpu}}-gpu-v100-latest-1 +# image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}} +# command: | +# cmake -S . --preset=cccl-infra -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} +# ctest --preset=cccl-infra + +# verify-devcontainers: +# name: Verify Dev Containers +# permissions: +# id-token: write +# contents: read +# uses: ./.github/workflows/verify-devcontainers.yml + +# verify-codegen: +# name: Verify Codegen in libcudacxx +# runs-on: ubuntu-latest +# steps: +# - name: Checkout repo +# uses: actions/checkout@v3 +# - name: Run libcudacxx codegen verification +# id: verify-codegen +# run: | +# sudo apt-get update +# sudo apt-get install ninja-build +# export CXX="g++" +# ./ci/verify_codegen.sh + +# # This job is the final job that runs after all other jobs and is used for branch protection status checks. +# # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks +# # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101 +# ci: +# runs-on: ubuntu-latest +# name: CI +# if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success +# needs: +# - clang-cuda +# - cub +# - libcudacxx +# - nvrtc +# - thrust +# - cccl-infra +# - verify-devcontainers +# - verify-codegen +# steps: +# - name: Check status of all precursor jobs +# if: >- +# ${{contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')}} +# run: exit 1 diff --git a/ci/compute-matrix.py b/ci/compute-matrix.py new file mode 100755 index 00000000000..6b8d3463e65 --- /dev/null +++ b/ci/compute-matrix.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python3 + +""" +Concepts: +- matrix_job: an entry of a workflow matrix, converted from matrix.yaml["workflow"][id] into a JSON object. + Example: + { + "job_types": [ + "test" + ], + "ctk": "11.1", + "gpu": "t4", + "cmake_cuda_arch": "75-real", + "host_compiler": { + "name": "llvm", + "version": "9", + "exe": "clang++" + }, + "std": [ + 17 + ], + "projects": [ + "libcudacxx", + "cub", + "thrust" + ], + "os": "ubuntu18.04" + } + +Matrix jobs are read from the matrix.yaml file and converted into a JSON object and passed to matrix_job_to_dispatch_group, where +the matrix job is turned into one or more dispatch groups consisting of potentially many jobs. + +- dispatch_group_json: A json object used in conjunction with the ci-dispatch-groups.yml GHA workflow. + Example: + { + "": { + "standalone": [ {}, ... ] + "two_stage": [ {}, ] + } + } + +- two_stage_json: A json object that represents bulk-synchronous producer/consumer jobs, used with ci-dispatch-two-stage.yml. + Example: + { + "producers": [ {}, ... ], + "consumers": [ {}, ... ] + } + +- job_json: A json object that represents a single job in a workflow. Used with ci-dispatch-job.yml. + Example: + { + dispatch: "...", # (linux|windows)-(cpu|gpu) + name: "...", + runner: "...", + image: "...", + command: "..." }, + } +""" + +import argparse +import copy +import json +import os +import sys +import yaml + +matrix_yaml = None +dirty_projects = [] + + +def write_output(key, value): + print(f"{key}={value}") + + # Check if the GITHUB_OUTPUT environment variable is set, and write to that file if it is. + output_file = os.environ.get('GITHUB_OUTPUT') + if output_file: + with open(output_file, 'a') as f: + f.write(f"{key}={value}\n") + + +def lookup_os(ctk, host_compiler_name, host_compiler_version): + key = f'ctk{ctk}-{host_compiler_name}{host_compiler_version}' + return matrix_yaml['default_os_lookup'][key] + + +def get_formatted_projected_name(project_name): + return matrix_yaml['formatted_project_names'][project_name] + + +def is_windows(matrix_job): + return matrix_job['os'].startswith('windows') + + +def validate_matrix_job(matrix_job): + for tag in matrix_yaml['required_tags']: + if tag not in matrix_job: + raise Exception(f"Missing required tag {tag} in matrix job {matrix_job}") + + all_tags = set(matrix_job.keys()) | set(matrix_yaml['required_tags']) | set(matrix_yaml['defaulted_tags']) + for tag in matrix_job: + if tag not in all_tags: + raise Exception(f"Unknown tag {tag} in matrix job {matrix_job}") + + +def fill_defaults_matrix_job(matrix_job): + generic_defaults = set(matrix_yaml['defaulted_tags']) + generic_defaults -= set(['os']) # handled specially. + + for tag in generic_defaults: + if tag not in matrix_job: + matrix_job[tag] = matrix_yaml['default_'+tag] + + if 'os' not in matrix_job: + matrix_job['os'] = lookup_os(matrix_job['ctk'], + matrix_job['host_compiler']['name'], + matrix_job['host_compiler']['version']) + + # Expand nvcc device compiler shortcut: + if matrix_job['device_compiler'] == 'nvcc': + matrix_job['device_compiler'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'} + + +def explode_matrix_job(matrix_job): + new_jobs = [] + for tag in matrix_yaml['explodable_tags']: + if tag in matrix_job and isinstance(matrix_job[tag], list) and len(matrix_job[tag]) > 1: + for value in matrix_job[tag]: + new_job = copy.deepcopy(matrix_job) + new_job[tag] = [value] + exploded = explode_matrix_job(new_job) + if exploded: + new_jobs.extend(exploded) + else: + new_jobs.append(new_job) + # Only explode the first explodable tag. Recursion handles the others. + break + + return new_jobs if len(new_jobs) > 0 else None + + +def generate_dispatch_group_name(matrix_job): + project_name = get_formatted_projected_name(matrix_job['projects'][0]) + ctk = matrix_job['ctk'] + device_compiler = matrix_job['device_compiler'] + host_compiler = matrix_job['host_compiler'] + + compiler_info = "" + if device_compiler['name'] == 'nvcc': + compiler_info = f"nvcc {host_compiler['name']}" + elif device_compiler['name'] == 'llvm': + compiler_info = f"clang-cuda-{device_compiler['version']}" + else: + compiler_info = f"{device_compiler['name']}-{device_compiler['version']} {host_compiler['name']}" + + return f"{project_name} CTK{ctk} {compiler_info}" + + +def generate_dispatch_job_runner_dispatch(matrix_job, job_type): + runner_os = "windows" if is_windows(matrix_job) else "linux" + cpu_gpu = "gpu" if job_type in matrix_yaml['gpu_required_job_types'] else "cpu" + + return f"{runner_os}-{cpu_gpu}" + + +def generate_dispatch_job_name(matrix_job, job_type): + formatted_job_type = matrix_yaml['formatted_job_types'][job_type] + + gpu_str = "" + if job_type in matrix_yaml['gpu_required_job_types']: + gpu_str = " " + matrix_job['gpu'].upper() + + cuda_compile_arch = (" sm{" + matrix_job['cmake_cuda_arch'] + "}") if 'cmake_cuda_arch' in matrix_job else "" + cmake_options = (" " + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else "" + cpu_str = (" " + matrix_job['cpu']) if matrix_job['cpu'] else "" + + return "{}: {}-{} C++{}{}{}{}{}".format( + formatted_job_type, + matrix_job['host_compiler']['name'], + matrix_job['host_compiler']['version'], + matrix_job['std'][0], + gpu_str, + cuda_compile_arch, + cmake_options, + cpu_str + ) + + +def generate_dispatch_job_runner(matrix_job, job_type): + runner_os = "windows" if is_windows(matrix_job) else "linux" + cpu = matrix_job['cpu'] + + if not job_type in matrix_yaml['gpu_required_job_types']: + return f"{runner_os}-{cpu}-cpu16" + + gpu = matrix_job['gpu'] + suffix = "-testing" if gpu in matrix_yaml['testing_pool_gpus'] else "" + + return f"{runner_os}-{cpu}-gpu-{gpu}-latest-1{suffix}" + + +def generate_dispatch_job_image(matrix_job, job_type): + devcontainer_version = matrix_yaml['devcontainer_version'] + ctk = matrix_job['ctk'] + image_os = matrix_job['os'] + host_compiler = matrix_job['host_compiler']['name'] + \ + matrix_job['host_compiler']['version'] + + if is_windows(matrix_job): + return f"rapidsai/devcontainers:{devcontainer_version}-cuda{ctk}-{host_compiler}-{image_os}" + + return f"rapidsai/devcontainers:{devcontainer_version}-cpp-{host_compiler}-cuda{ctk}-{image_os}" + + +def generate_dispatch_job_command(matrix_job, job_type): + script_path = ".ci/windows" if is_windows(matrix_job) else ".ci" + script_ext = ".ps1" if is_windows(matrix_job) else ".sh" + script_job_type = job_type + script_project = matrix_job['projects'][0] + script_name = f"{script_path}/{script_job_type}_{script_project}{script_ext}" + + std = matrix_job['std'][0] + host_compiler_exe = matrix_job['host_compiler']['exe'] + device_compiler_name = matrix_job['device_compiler']['name'] + device_compiler_exe = matrix_job['device_compiler']['exe'] + + cuda_compile_arch = matrix_job['cmake_cuda_arch'] if 'cmake_cuda_arch' in matrix_job else '' + cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else '' + + command = f"\"{script_name}\" -std {std}" + if cuda_compile_arch: + command += f" -arch \"{cuda_compile_arch}\"" + if device_compiler_name != 'nvcc': + command += f" -cuda \"{device_compiler_exe}\"" + if cmake_options: + cmake_args = " ".join([f"{key}={value}" for key, value in cmake_options.items()]) + command += f" -cmake-options \"{cmake_args}\"" + + return command + + +def generate_dispatch_job_json(matrix_job, job_type): + return { + 'dispatch': generate_dispatch_job_runner_dispatch(matrix_job, job_type), + 'name': generate_dispatch_job_name(matrix_job, job_type), + 'runner': generate_dispatch_job_runner(matrix_job, job_type), + 'image': generate_dispatch_job_image(matrix_job, job_type), + 'command': generate_dispatch_job_command(matrix_job, job_type) + } + + +# Create a single build producer, and a separate consumer for each test_job_type: +def generate_dispatch_build_and_test_json(matrix_job, build_job_type, test_job_types): + build_json = generate_dispatch_job_json(matrix_job, build_job_type) + + test_json = {} + for test_job_type in test_job_types: + test_json.update(generate_dispatch_job_json(matrix_job, test_job_type)) + + return { + "producers": [build_json], + "consumers": [test_json] + } + + +def generate_dispatch_group_jobs(matrix_job): + dispatch_group_jobs = { + "standalone": [], + "two_stage": [] + } + + job_types = set(copy.deepcopy(matrix_job['job_types'])) + for job_type in job_types: + if (not job_type in matrix_yaml['all_job_types']): + raise Exception(f"Unsupported job type {job_type}") + + # job_types that appear in build_required_job_types: + build_required = set(matrix_yaml['build_required_job_types']) & job_types + has_build_and_test = len(build_required) > 0 + job_types -= build_required + + has_standalone_build = 'build' in job_types and not has_build_and_test + job_types -= {'build'} + + if has_standalone_build: + dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, "build")) + elif has_build_and_test: + dispatch_group_jobs['two_stage'].append( + generate_dispatch_build_and_test_json(matrix_job, "build", build_required)) + + # Remaining jobs are assumed to be standalone (e.g. nvrtc): + for job_type in job_types: + dispatch_group_jobs['standalone'].append(generate_dispatch_job_json(matrix_job, job_type)) + + return dispatch_group_jobs + + +def merge_dispatch_groups(accum_dispatch_groups, new_dispatch_groups): + for group_name, group_json in new_dispatch_groups.items(): + if group_name not in accum_dispatch_groups: + accum_dispatch_groups[group_name] = group_json + else: + # iterate standalone and two_stage: + for key, value in group_json.items(): + accum_dispatch_groups[group_name][key] += value + + +def matrix_job_to_dispatch_group(matrix_job): + validate_matrix_job(matrix_job) + fill_defaults_matrix_job(matrix_job) + + # If the job explodes, recurse into the results: + exploded_jobs = explode_matrix_job(matrix_job) + if exploded_jobs is not None: + all_dispatch_groups = {} + for job in exploded_jobs: + dispatch_group = matrix_job_to_dispatch_group(job) + merge_dispatch_groups(all_dispatch_groups, dispatch_group) + return all_dispatch_groups + + # Filter jobs that don't need to rerun: + if matrix_job['projects'][0] not in dirty_projects: + return {} + + # We have a fully specified job, start processing. + dispatch_group_name = generate_dispatch_group_name(matrix_job) + dispatch_group_jobs = generate_dispatch_group_jobs(matrix_job) + + return {dispatch_group_name: dispatch_group_jobs} + + +def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig): + workflow_dispatch_groups = copy.deepcopy(workflow_dispatch_groups_orig) + + # Remove all named values that contain an empty list of jobs: + for group_name, group_json in workflow_dispatch_groups.items(): + if not group_json['standalone'] and not group_json['two_stage']: + del workflow_dispatch_groups[group_name] + elif not group_json['standalone']: + del group_json['standalone'] + elif not group_json['two_stage']: + del group_json['two_stage'] + + # Sort the dispatch groups by name: + workflow_dispatch_groups = dict(sorted(workflow_dispatch_groups.items())) + + # Sort the jobs within each dispatch group: + for group_name, group_json in workflow_dispatch_groups.items(): + if 'standalone' in group_json: + group_json['standalone'] = sorted(group_json['standalone'], key=lambda x: x['name']) + if 'two_stage' in group_json: + group_json['two_stage'] = sorted(group_json['two_stage'], key=lambda x: x['producers'][0]['name']) + + # Count the total number of jobs: + total_jobs = 0 + for group_name, group_json in workflow_dispatch_groups.items(): + if 'standalone' in group_json: + for job_json in group_json['standalone']: + total_jobs += 1 + print(f"{total_jobs} - {group_name}: {job_json['name']}", file=sys.stderr) + if 'two_stage' in group_json: + for two_stage_json in group_json['two_stage']: + for job_json in two_stage_json['producers']: + total_jobs += 1 + print(f"{total_jobs} - {group_name}: {job_json['name']}", file=sys.stderr) + for job_json in two_stage_json['consumers']: + total_jobs += 1 + print(f"{total_jobs} - {group_name}: {job_json['name']}", file=sys.stderr) + + print(f"Total jobs: {total_jobs}", file=sys.stderr) + + return workflow_dispatch_groups + + +def main(): + global dirty_projects + global matrix_yaml + + parser = argparse.ArgumentParser(description='Compute matrix for workflow') + parser.add_argument('matrix_file', help='Path to the matrix YAML file') + parser.add_argument('workflow', help='Name of the workflow') + parser.add_argument('--dirty-projects', nargs='*', dest='dirty_projects', + help='Project(s) to rerun', default=[]) + args = parser.parse_args() + dirty_projects = args.dirty_projects + + # Check if the matrix file exists + if not os.path.isfile(args.matrix_file): + print(f"Error: Matrix file '{args.matrix_file}' does not exist.") + sys.exit(1) + + with open(args.matrix_file, 'r') as f: + matrix_yaml = yaml.safe_load(f) + + # Check if the workflow is valid + if args.workflow not in matrix_yaml: + print(f"Error: Workflow '{args.workflow}' does not exist in the matrix YAML.") + sys.exit(1) + + # Print usage if no arguments are provided + if not args.matrix_file and not args.workflow: + parser.print_usage() + sys.exit(1) + + # Print the arguments to stderr: + print("Arguments:", file=sys.stderr) + print(args, file=sys.stderr) + print("Matrix YAML:", file=sys.stderr) + print(matrix_yaml, file=sys.stderr) + + matrix_json = matrix_yaml[args.workflow] + + workflow_dispatch_groups = {} + for matrix_job in matrix_json: + merge_dispatch_groups(workflow_dispatch_groups, matrix_job_to_dispatch_group(matrix_job)) + + final_workflow = finalize_workflow_dispatch_groups(workflow_dispatch_groups) + + # Pretty print the workflow json to stderr: + print(json.dumps(final_workflow, indent=2), file=sys.stderr) + + # Print a single-line, compact version of the workflow json to stdout: + write_output("WORKFLOW", json.dumps(final_workflow)) + # Print the list of key (dispatch group) names to stdout in a single line as a json list: + write_output("WORKFLOW_KEYS", json.dumps(list(final_workflow.keys()))) + + +if __name__ == '__main__': + main() diff --git a/ci/inspect_changes.sh b/ci/inspect_changes.sh index 59500a70554..7385318f524 100755 --- a/ci/inspect_changes.sh +++ b/ci/inspect_changes.sh @@ -90,19 +90,6 @@ add_dependencies() { return 0 } -# write_subproject_status -# Write the output _DIRTY={true|false} -write_subproject_status() { - local subproject="$1" - local dirty_flag=${subproject^^}_DIRTY - - if [[ ${!dirty_flag} -ne 0 ]]; then - write_output "${dirty_flag}" "true" - else - write_output "${dirty_flag}" "false" - fi -} - main() { # Print the list of subprojects and all of their dependencies: echo "Subprojects: ${subprojects[*]}" @@ -144,9 +131,15 @@ main() { done echo + declare -a dirty_subprojects=() for subproject in "${subprojects[@]}"; do - write_subproject_status ${subproject} + var_name="${subproject^^}_DIRTY" + if [[ ${!var_name} -ne 0 ]]; then + dirty_subprojects+=("$subproject") + fi done + + write_output "DIRTY_PROJECTS" "${dirty_subprojects[*]}" } main "$@" diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 589de44bd3c..a50e235e87f 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -1,12 +1,6 @@ - -cuda_prev_min: &cuda_prev_min '11.1' -cuda_prev_max: &cuda_prev_max '11.8' -cuda_curr: &cuda_curr '12.4' - -# The GPUs to test on -gpus: - - 'a100' - - 'v100' +ctk_prev_min: &ctk_prev_min '11.1' +ctk_prev_max: &ctk_prev_max '11.8' +ctk_curr: &ctk_curr '12.4' # The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers devcontainer_version: '24.06' @@ -42,54 +36,154 @@ msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' } # oneAPI configs oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' } -# Each environment below will generate a unique build/test job -# See the "compute-matrix" job in the workflow for how this is parsed and used -# cuda: The CUDA Toolkit version -# os: The operating system used -# cpu: The CPU architecture -# compiler: The compiler to use -# name: The compiler name -# version: The compiler version -# exe: The unverionsed compiler binary name -# std: The C++ standards to build for -# This field is unique as it will generate an independent build/test job for each value +# +# Resources for compute_matrix.py: +# + +# `default_`: Used when the tag is omitted. +default_ctk: *ctk_curr +default_device_compiler: 'nvcc' +default_host_compiler: *gcc12 +default_cpu: 'amd64' +default_gpu: 'v100' +default_projects: &default_projects + - 'libcudacxx' + - 'cub' + - 'thrust' +# Special handling: lookup map +default_os_lookup: + 'ctk11.1-gcc6': 'ubuntu18.04' + 'ctk11.1-gcc7': 'ubuntu18.04' + 'ctk11.1-gcc8': 'ubuntu18.04' + 'ctk11.1-gcc9': 'ubuntu18.04' + 'ctk11.1-llvm9': 'ubuntu18.04' + 'ctk11.1-cl14.16': 'windows2022' + 'ctk11.8-gcc11': 'ubuntu22.04' + 'ctk12.4-gcc7': 'ubuntu20.04' + 'ctk12.4-gcc8': 'ubuntu20.04' + 'ctk12.4-gcc9': 'ubuntu20.04' + 'ctk12.4-gcc10': 'ubuntu20.04' + 'ctk12.4-gcc11': 'ubuntu22.04' + 'ctk12.4-gcc12': 'ubuntu22.04' + 'ctk12.4-llvm9': 'ubuntu20.04' + 'ctk12.4-llvm10': 'ubuntu20.04' + 'ctk12.4-llvm11': 'ubuntu20.04' + 'ctk12.4-llvm12': 'ubuntu20.04' + 'ctk12.4-llvm13': 'ubuntu20.04' + 'ctk12.4-llvm14': 'ubuntu20.04' + 'ctk12.4-llvm15': 'ubuntu22.04' + 'ctk12.4-llvm16': 'ubuntu22.04' + 'ctk12.4-cl14.29': 'windows2022' + 'ctk12.4-cl14.39': 'windows2022' + 'ctk12.4-oneapi2023.2.0': 'ubuntu22.04' + +all_gpus: + - 'v100' # ??x: sm70, 32 GB + - 't4' # 8x: sm75, 16 GB + - 'rtx2080' # 8x: sm75, 8 GB + - 'rtxa6000' # 12x: sm86, 48 GB + - 'l4' # 48x: sm89, 24 GB + - 'rtx4090' # 10x: sm89, 24 GB + - 'h100' # 16x: sm90, ?? GB +testing_pool_gpus: + - 't4' + - 'rtx2080' + - 'rtxa6000' + - 'l4' + - 'rtx4090' + - 'h100' + +all_projects: + - 'libcudacxx' + - 'cub' + - 'thrust' +formatted_project_names: + 'libcudacxx': 'libcu++' + 'cub': 'CUB' + 'thrust': 'Thrust' + +all_job_types: + - 'build' + - 'test' + - 'nvrtc' +formatted_job_types: + 'build': 'Build' + 'test': 'Test' + 'nvrtc': 'NVRTC' + +# Error if tags are missing: +required_tags: &required_tags ['job_types'] + +# Tags that will be added if not specified: +defaulted_tags: &defaulted_tags ['ctk', 'cpu', 'gpu', 'host_compiler', 'device_compiler', 'std', 'projects', 'os'] + +# Tags that may be omitted: +optional_tags: &optional_tags ['cmake_cuda_arch', 'cmake_options'] + +# If these tags are lists, they will be exploded into separate jobs +explodable_tags: ['projects', 'std'] + +# job_types that have an implied prerequisite 'build' job: +build_required_job_types: ['test'] + +# job_types that require a GPU +gpu_required_job_types: ['test', 'nvrtc'] + +# +# Workflow matrices: +# # Configurations that will run for every PR pull_request: - nvcc: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc6, std: [11, 14], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [14, 17], jobs: ['build']} - - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [11, 14, 17], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build', 'test']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [11, 14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16, std: [11, 14, 17, 20], jobs: ['build', 'test']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16, std: [11, 14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [14, 17], jobs: ['build']} - - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [14, 17, 20], jobs: ['build']} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi, std: [11, 14, 17], jobs: ['build']} - nvrtc: - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', std: [11, 14, 17, 20]} - clang-cuda: - - {lib: ['thrust', 'cub', 'libcudacxx'], cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest, std: [17, 20]} - cccl-infra: - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest} - - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest} - - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest} + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc6, std: [11, 14] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc7, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc8, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc9, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *llvm9, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *msvc2017, std: [14, 17] } + - {job_types: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11, std: [11, 14, 17], cmake_cuda_arch: '90'} + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc7, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc8, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc9, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc10, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc11, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20], cmake_cuda_arch: '90'} + - {job_types: ['test'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20], cpu: 'arm64'} + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm9, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm10, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm11, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm12, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm13, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm14, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm15, std: [11, 14, 17, 20] } + - {job_types: ['test'], ctk: *ctk_curr, host_compiler: *llvm16, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *llvm16, std: [11, 14, 17, 20], cpu: 'arm64'} + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2019, std: [14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2022, std: [14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *oneapi, std: [11, 14, 17] } + # nvrtc: + - {job_types: ['nvrtc'], projects: ['libcudacxx'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]} + # clang-cuda: + - {job_types: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]} + # cccl-infra: + # TODO: + # - {ctk: *ctk_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc-oldest} + # - {ctk: *ctk_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm-oldest} + # - {ctk: *ctk_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc-newest} + # - {ctk: *ctk_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm-newest} + +# Run each night: +nightly: + - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 'v100', cmake_cuda_arch: '70-real', host_compiler: *gcc6, std: [11] } + - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 't4', cmake_cuda_arch: '75-real', host_compiler: *llvm9, std: [17] } + - {job_types: ['test'], ctk: *ctk_prev_max, gpu: 'rtx2080', cmake_cuda_arch: '75-real', host_compiler: *gcc11, std: [17] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86-real', host_compiler: *gcc7, std: [14] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89-real', host_compiler: *gcc12, std: [11, 14, 17, 20] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtx4090', cmake_cuda_arch: '89-real', host_compiler: *llvm9, std: [11] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90-real', host_compiler: *gcc12, std: [11, 20] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90-real', host_compiler: *llvm16, std: [17] } + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 't4', cmake_cuda_arch: '75-real', host_compiler: *gcc12, std: [20], projects: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86-real', host_compiler: *gcc12, std: [20], projects: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89-real', host_compiler: *gcc12, std: [11, 14, 17, 20], projects: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90-real', host_compiler: *gcc12, std: [11, 20], projects: ['libcudacxx']}