From c5dcba0333baa4daf41292a8901044442c8d79c8 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 18 Sep 2024 14:43:38 -0500 Subject: [PATCH 01/10] Use CI workflow branch 'branch-24.10' again [skip ci] (#2437) --- .github/workflows/build.yaml | 16 ++++++++-------- .github/workflows/pr.yaml | 26 +++++++++++++------------- .github/workflows/test.yaml | 10 +++++----- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 037d7cc05f..2b0ae5099c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: package-name: pylibraft wheel-build-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -98,7 +98,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 26c6928f77..381ca6b378 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,29 +25,29 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: pull-request enable_check_symbols: true @@ -55,19 +55,19 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,34 +77,34 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 90518c1132..ad0456d526 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: raft_cutlass conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} From cd02a8f2b8dc22ec2115e80e559a4ab98d402c6e Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Fri, 20 Sep 2024 13:05:44 -0400 Subject: [PATCH 02/10] Fix sed syntax [skip-ci] (#2441) --- ci/release/update-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 075eb896f2..5bb98511cf 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -58,7 +58,7 @@ for FILE in dependencies.yaml conda/environments/*.yaml; do sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done for DEP in "${UCXX_DEPENDENCIES[@]}"; do - sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXX_SHORT_TAG_PEP440}.*/,>=0.0.0a0/g" "${FILE}" + sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXX_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done done for FILE in python/*/pyproject.toml; do From f49567e1fb9f722f8864bc84880745b9b776eb61 Mon Sep 17 00:00:00 2001 From: Micka Date: Sat, 21 Sep 2024 01:09:00 +0200 Subject: [PATCH 03/10] Use runtime check of cudart version for eig (#2430) Authors: - Micka (https://github.com/lowener) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2430 --- cpp/include/raft/linalg/detail/eig.cuh | 31 ++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/linalg/detail/eig.cuh b/cpp/include/raft/linalg/detail/eig.cuh index ba7ed3dcdf..561187178c 100644 --- a/cpp/include/raft/linalg/detail/eig.cuh +++ b/cpp/include/raft/linalg/detail/eig.cuh @@ -95,16 +95,19 @@ void eigDC(raft::resources const& handle, return; #endif -#if CUDART_VERSION <= 12040 - // Use a new stream instead of `cudaStreamPerThread` to avoid cusolver bug # 4580093. + int cudart_version = 0; + RAFT_CUDA_TRY(cudaRuntimeGetVersion(&cudart_version)); + cudaStream_t stream_new; + cudaEvent_t sync_event = resource::detail::get_cuda_stream_sync_event(handle); rmm::cuda_stream stream_new_wrapper; - cudaStream_t stream_new = stream_new_wrapper.value(); - cudaEvent_t sync_event = resource::detail::get_cuda_stream_sync_event(handle); - RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream_new, sync_event)); -#else - cudaStream_t stream_new = stream; -#endif + if (cudart_version < 12050) { + // Use a new stream instead of `cudaStreamPerThread` to avoid cusolver bug # 4580093. + stream_new = stream_new_wrapper.value(); + RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream_new, sync_event)); + } else { + stream_new = stream; + } cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle); cusolverDnParams_t dn_params = nullptr; @@ -152,11 +155,11 @@ void eigDC(raft::resources const& handle, "eig.cuh: eigensolver couldn't converge to a solution. " "This usually occurs when some of the features do not vary enough."); -#if CUDART_VERSION <= 12040 - // Synchronize the created stream with the original stream before return - RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream_new)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_event)); -#endif + if (cudart_version < 12050) { + // Synchronize the created stream with the original stream before return + RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream_new)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_event)); + } } enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT }; From b79f15d2f229849bc02425b2e4ffd7bd3db89d4c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 23 Sep 2024 16:09:01 -0500 Subject: [PATCH 04/10] Update fmt (to 11.0.2) and spdlog (to 1.14.1). (#2433) * Update fmt (to 11.0.2) and spdlog (to 1.14.1). * use rmm and ucxx CI artifacts * try using librmm wheels * try again to use rmm wheels * rapids-get-pr-wheel-artifact was missing RAPIDS_PY_WHEEL_NAME * ok you do not need to provide the Python slug yourself for rapids-get-pr-wheel-artifact * constraints need 'file://' protocol * try suppressing unreachable-code diagnostics from nvcc (this should be narrowed down / upstreamed before merging) * fix checks * Revert "try suppressing unreachable-code diagnostics from nvcc (this should be narrowed down / upstreamed before merging)" This reverts commit 3ba2201d678cc8befdfcaa0e89630a95a00a78b2. * copyright * move rapids-cmake overrides [skip ci] * kick off a build * fix dependency graph * devcontainer * run all CI * remove testing-only changes [skip ci] --- conda/recipes/libraft/conda_build_config.yaml | 4 ++-- .../raft-ann-bench-cpu/conda_build_config.yaml | 4 ++-- cpp/cmake/thirdparty/get_spdlog.cmake | 11 +---------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 00b133c821..5c0047fb9c 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -73,7 +73,7 @@ cuda11_cuda_profiler_api_run_version: - ">=11.4.240,<12" spdlog_version: - - ">=1.12.0,<1.13" + - ">=1.14.1,<1.15" fmt_version: - - ">=10.1.1,<11" + - ">=11.0.2,<12" diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml index 70d1f0490e..ed6f708e14 100644 --- a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml @@ -23,7 +23,7 @@ nlohmann_json_version: - ">=3.11.2" spdlog_version: - - ">=1.12.0,<1.13" + - ">=1.14.1,<1.15" fmt_version: - - ">=10.1.1,<11" + - ">=11.0.2,<12" diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake index 7be7804c7e..57e38c2638 100644 --- a/cpp/cmake/thirdparty/get_spdlog.cmake +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -19,15 +19,6 @@ function(find_and_configure_spdlog) rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports) rapids_export_package(BUILD spdlog rmm-exports) - if(spdlog_ADDED) - rapids_export( - BUILD spdlog - EXPORT_SET spdlog - GLOBAL_TARGETS spdlog spdlog_header_only - NAMESPACE spdlog::) - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET rmm-exports) - endif() endfunction() find_and_configure_spdlog() \ No newline at end of file From 878cefc923f25df07fc362126d1c269bdff30c0c Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:10:57 -0500 Subject: [PATCH 05/10] update update-version.sh to use packaging lib (#2447) --- ci/release/update-version.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 5bb98511cf..032b88b4aa 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -25,8 +25,8 @@ NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} NEXT_UCXX_SHORT_TAG="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG})" # Need to distutils-normalize the original version -NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") -NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_UCXX_SHORT_TAG}'))") +NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))") +NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_UCXX_SHORT_TAG}'))") echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" From 17757b84c38dfa671abe5e8794fd2bd9bc636772 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 25 Sep 2024 13:49:36 -0500 Subject: [PATCH 06/10] Switch traceback to `native` (#2446) In cudf we have observed a ~10% speed up of pytest suite execution by switching pytest traceback to `--native`: ``` currently: 102474 passed, 2117 skipped, 902 xfailed in 892.16s (0:14:52) --tb=short: 102474 passed, 2117 skipped, 902 xfailed in 898.99s (0:14:58) --tb=no: 102474 passed, 2117 skipped, 902 xfailed in 815.98s (0:13:35) --tb=native: 102474 passed, 2117 skipped, 902 xfailed in 820.92s (0:13:40) ``` This PR makes a similar change to `raft` repo. xref: https://github.com/rapidsai/cudf/pull/16851 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2446 --- python/pylibraft/pylibraft/test/pytest.ini | 5 +++++ python/raft-dask/pytest.ini | 1 + python/raft-dask/raft_dask/test/pytest.ini | 5 +++++ 3 files changed, 11 insertions(+) create mode 100644 python/pylibraft/pylibraft/test/pytest.ini create mode 100644 python/raft-dask/raft_dask/test/pytest.ini diff --git a/python/pylibraft/pylibraft/test/pytest.ini b/python/pylibraft/pylibraft/test/pytest.ini new file mode 100644 index 0000000000..bf70c06f84 --- /dev/null +++ b/python/pylibraft/pylibraft/test/pytest.ini @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native + diff --git a/python/raft-dask/pytest.ini b/python/raft-dask/pytest.ini index 2467e2089a..e09c2b173d 100644 --- a/python/raft-dask/pytest.ini +++ b/python/raft-dask/pytest.ini @@ -10,3 +10,4 @@ markers = nccl: marks a test as using NCCL ucx: marks a test as using UCX-Py ucxx: marks a test as using UCXX +addopts = --tb=native diff --git a/python/raft-dask/raft_dask/test/pytest.ini b/python/raft-dask/raft_dask/test/pytest.ini new file mode 100644 index 0000000000..bf70c06f84 --- /dev/null +++ b/python/raft-dask/raft_dask/test/pytest.ini @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native + From 0f8b09720054e5cf7a65dddfa02a2f06e7ba8adb Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 25 Sep 2024 20:53:54 -0400 Subject: [PATCH 07/10] Disable NN Descent Batch tests temporarily (#2453) Linked issue https://github.com/rapidsai/raft/issues/2450 Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2453 --- cpp/test/CMakeLists.txt | 4 +++- cpp/test/neighbors/ann_nn_descent.cuh | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index a497e6d3ba..5d504d2100 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -440,7 +440,9 @@ if(BUILD_TESTS) neighbors/ann_nn_descent/test_float_uint32_t.cu neighbors/ann_nn_descent/test_int8_t_uint32_t.cu neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - neighbors/ann_nn_descent/test_batch_float_uint32_t.cu + # TODO: Investigate why this test is failing Reference issue + # https://github.com/rapidsai/raft/issues/2450 + # neighbors/ann_nn_descent/test_batch_float_uint32_t.cu LIB EXPLICIT_INSTANTIATE_ONLY GPUS diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index 2f9d4e252b..5070d83b15 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -318,13 +318,15 @@ const std::vector inputs = raft::util::itertools::product inputsBatch = - raft::util::itertools::product( - {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters - {4000, 5000}, // n_rows - {192, 512}, // dim - {32, 64}, // graph_degree - {raft::distance::DistanceType::L2Expanded}, - {false, true}); +// TODO: Investigate why this test is failing +// Reference issue https://github.com/rapidsai/raft/issues/2450 +// const std::vector inputsBatch = +// raft::util::itertools::product( +// {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters +// {4000, 5000}, // n_rows +// {192, 512}, // dim +// {32, 64}, // graph_degree +// {raft::distance::DistanceType::L2Expanded}, +// {false, true}); } // namespace raft::neighbors::experimental::nn_descent From 2e705fbf5bfa9af148871ba509ac55593da9a098 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:01:05 -0700 Subject: [PATCH 08/10] Add missing `cuda_suffixed: true` (#2440) This change ensures `rmm-cuXX` is not in the package dependencies when building the pylibraft wheel with `-C "rapidsai.matrix-entry=cuda_suffixed=false`. Authors: - Paul Taylor (https://github.com/trxcllnt) - James Lamb (https://github.com/jameslamb) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/raft/pull/2440 --- dependencies.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 8f5c69245f..91fb176b56 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -499,10 +499,14 @@ dependencies: - *cuda_python - output_types: [requirements, pyproject] matrices: - - matrix: {cuda: "12.*"} + - matrix: + cuda: "12.*" + cuda_suffixed: "true" packages: - *rmm_cu12 - - matrix: {cuda: "11.*"} + - matrix: + cuda: "11.*" + cuda_suffixed: "true" packages: - *rmm_cu11 - {matrix: null, packages: [*rmm_unsuffixed]} From 0284b424367f07220de133715043a963b341485a Mon Sep 17 00:00:00 2001 From: rhdong Date: Thu, 26 Sep 2024 07:42:29 -0700 Subject: [PATCH 09/10] [Feat] add `repeat`, `sparsity`, `eval_n_elements` APIs to `bitset` (#2439) - This PR is a part of the feature that applies the prefilter brute-force in Cagra. Authors: - rhdong (https://github.com/rhdong) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/raft/pull/2439 --- cpp/bench/prims/util/popc.cu | 7 +- cpp/include/raft/core/bitset.cuh | 107 +++++++++++++++++++++++- cpp/include/raft/core/bitset.hpp | 76 +++++++++++++++++ cpp/include/raft/util/detail/popc.cuh | 6 +- cpp/include/raft/util/popc.cuh | 4 +- cpp/test/core/bitset.cu | 114 +++++++++++++++++++++++--- cpp/test/util/popc.cu | 22 +++-- 7 files changed, 311 insertions(+), 25 deletions(-) diff --git a/cpp/bench/prims/util/popc.cu b/cpp/bench/prims/util/popc.cu index 249dc13d1e..c6249fb2bd 100644 --- a/cpp/bench/prims/util/popc.cu +++ b/cpp/bench/prims/util/popc.cu @@ -89,10 +89,9 @@ struct popc_bench : public fixture { auto bits_view = raft::make_device_vector_view(bits_d.data_handle(), bits_d.size()); - index_t max_len = params.n_rows * params.n_cols; - auto max_len_view = raft::make_host_scalar_view(&max_len); - auto nnz_actual_view = - nnz_actual_d.view(); // raft::make_device_scalar_view(nnz_actual_d.data_handle()); + index_t max_len = params.n_rows * params.n_cols; + auto max_len_view = raft::make_host_scalar_view(&max_len); + auto nnz_actual_view = nnz_actual_d.view(); raft::popc(this->handle, bits_view, max_len_view, nnz_actual_view); }); } diff --git a/cpp/include/raft/core/bitset.cuh b/cpp/include/raft/core/bitset.cuh index b6e6128eca..d1bffdb81e 100644 --- a/cpp/include/raft/core/bitset.cuh +++ b/cpp/include/raft/core/bitset.cuh @@ -26,6 +26,8 @@ #include #include +#include + #include namespace raft::core { @@ -60,6 +62,109 @@ _RAFT_DEVICE void bitset_view::set(const index_t sample_index } } +template +void bitset_view::count(const raft::resources& res, + raft::device_scalar_view count_gpu_scalar) const +{ + auto max_len = raft::make_host_scalar_view(&bitset_len_); + auto values = raft::make_device_vector_view(bitset_ptr_, n_elements()); + raft::popc(res, values, max_len, count_gpu_scalar); +} + +template +RAFT_KERNEL bitset_repeat_kernel(const bitset_t* src, + bitset_t* output, + index_t src_bit_len, + index_t repeat_times) +{ + constexpr index_t bits_per_element = sizeof(bitset_t) * 8; + int output_idx = blockIdx.x * blockDim.x + threadIdx.x; + + index_t total_bits = src_bit_len * repeat_times; + index_t output_size = (total_bits + bits_per_element - 1) / bits_per_element; + index_t src_size = (src_bit_len + bits_per_element - 1) / bits_per_element; + + if (output_idx < output_size) { + bitset_t result = 0; + index_t bit_written = 0; + + index_t start_bit = output_idx * bits_per_element; + + while (bit_written < bits_per_element && start_bit + bit_written < total_bits) { + index_t bit_idx = (start_bit + bit_written) % src_bit_len; + index_t src_word_idx = bit_idx / bits_per_element; + index_t src_offset = bit_idx % bits_per_element; + + index_t remaining_bits = min(bits_per_element - bit_written, src_bit_len - bit_idx); + + bitset_t src_value = (src[src_word_idx] >> src_offset); + + if (src_offset + remaining_bits > bits_per_element) { + bitset_t next_value = src[(src_word_idx + 1) % src_size]; + src_value |= (next_value << (bits_per_element - src_offset)); + } + src_value &= ((bitset_t{1} << remaining_bits) - 1); + result |= (src_value << bit_written); + bit_written += remaining_bits; + } + output[output_idx] = result; + } +} + +template +void bitset_repeat(raft::resources const& handle, + const bitset_t* d_src, + bitset_t* d_output, + index_t src_bit_len, + index_t repeat_times) +{ + if (src_bit_len == 0 || repeat_times == 0) return; + auto stream = resource::get_cuda_stream(handle); + + constexpr index_t bits_per_element = sizeof(bitset_t) * 8; + const index_t total_bits = src_bit_len * repeat_times; + const index_t output_size = (total_bits + bits_per_element - 1) / bits_per_element; + + int threadsPerBlock = 128; + int blocksPerGrid = (output_size + threadsPerBlock - 1) / threadsPerBlock; + bitset_repeat_kernel<<>>( + d_src, d_output, src_bit_len, repeat_times); + + return; +} + +template +void bitset_view::repeat(const raft::resources& res, + index_t times, + bitset_t* output_device_ptr) const +{ + auto thrust_policy = raft::resource::get_thrust_policy(res); + constexpr index_t bits_per_element = sizeof(bitset_t) * 8; + + if (bitset_len_ % bits_per_element == 0) { + index_t num_elements_to_copy = bitset_len_ / bits_per_element; + + for (index_t i = 0; i < times; ++i) { + raft::copy(output_device_ptr + i * num_elements_to_copy, + bitset_ptr_, + num_elements_to_copy, + raft::resource::get_cuda_stream(res)); + } + } else { + bitset_repeat(res, bitset_ptr_, output_device_ptr, bitset_len_, times); + } +} + +template +double bitset_view::sparsity(const raft::resources& res) const +{ + index_t size_h = this->size(); + if (0 == size_h) { return static_cast(1.0); } + index_t count_h = this->count(res); + + return static_cast((1.0 * (size_h - count_h)) / (1.0 * size_h)); +} + template bitset::bitset(const raft::resources& res, raft::device_vector_view mask_index, @@ -155,7 +260,7 @@ template void bitset::count(const raft::resources& res, raft::device_scalar_view count_gpu_scalar) { - auto max_len = raft::make_host_scalar_view(&bitset_len_); + auto max_len = raft::make_host_scalar_view(&bitset_len_); auto values = raft::make_device_vector_view(bitset_.data(), n_elements()); raft::popc(res, values, max_len, count_gpu_scalar); diff --git a/cpp/include/raft/core/bitset.hpp b/cpp/include/raft/core/bitset.hpp index 3608ee43fa..be828def87 100644 --- a/cpp/include/raft/core/bitset.hpp +++ b/cpp/include/raft/core/bitset.hpp @@ -22,6 +22,8 @@ #include #include +#include + namespace raft::core { /** * @defgroup bitset Bitset @@ -103,6 +105,80 @@ struct bitset_view { { return raft::make_device_vector_view(bitset_ptr_, n_elements()); } + /** + * @brief Returns the number of bits set to true in count_gpu_scalar. + * + * @param[in] res RAFT resources + * @param[out] count_gpu_scalar Device scalar to store the count + */ + void count(const raft::resources& res, raft::device_scalar_view count_gpu_scalar) const; + /** + * @brief Returns the number of bits set to true. + * + * @param res RAFT resources + * @return index_t Number of bits set to true + */ + auto count(const raft::resources& res) const -> index_t + { + auto count_gpu_scalar = raft::make_device_scalar(res, 0.0); + count(res, count_gpu_scalar.view()); + index_t count_cpu = 0; + raft::update_host( + &count_cpu, count_gpu_scalar.data_handle(), 1, resource::get_cuda_stream(res)); + resource::sync_stream(res); + return count_cpu; + } + + /** + * @brief Repeats the bitset data and copies it to the output device pointer. + * + * This function takes the original bitset data stored in the device memory + * and repeats it a specified number of times into a new location in the device memory. + * The bits are copied bit-by-bit to ensure that even if the number of bits (bitset_len_) + * is not a multiple of the bitset element size (e.g., 32 for uint32_t), the bits are + * tightly packed without any gaps between rows. + * + * @param res RAFT resources for managing CUDA streams and execution policies. + * @param times Number of times the bitset data should be repeated in the output. + * @param output_device_ptr Device pointer where the repeated bitset data will be stored. + * + * The caller must ensure that the output device pointer has enough memory allocated + * to hold `times * bitset_len` bits, where `bitset_len` is the number of bits in the original + * bitset. This function uses Thrust parallel algorithms to efficiently perform the operation on + * the GPU. + */ + void repeat(const raft::resources& res, index_t times, bitset_t* output_device_ptr) const; + + /** + * @brief Calculate the sparsity (fraction of 0s) of the bitset. + * + * This function computes the sparsity of the bitset, defined as the ratio of unset bits (0s) + * to the total number of bits in the set. If the total number of bits is zero, the function + * returns 1.0, indicating the set is fully sparse. + * + * @param res RAFT resources for managing CUDA streams and execution policies. + * @return double The sparsity of the bitset, i.e., the fraction of unset bits. + * + * This API will synchronize on the stream of `res`. + */ + double sparsity(const raft::resources& res) const; + + /** + * @brief Calculates the number of `bitset_t` elements required to store a bitset. + * + * This function computes the number of `bitset_t` elements needed to store a bitset, ensuring + * that all bits are accounted for. If the bitset length is not a multiple of the `bitset_t` size + * (in bits), the calculation rounds up to include the remaining bits in an additional `bitset_t` + * element. + * + * @param bitset_len The total length of the bitset in bits. + * @return size_t The number of `bitset_t` elements required to store the bitset. + */ + static inline size_t eval_n_elements(size_t bitset_len) + { + const size_t bits_per_element = sizeof(bitset_t) * 8; + return (bitset_len + bits_per_element - 1) / bits_per_element; + } private: bitset_t* bitset_ptr_; diff --git a/cpp/include/raft/util/detail/popc.cuh b/cpp/include/raft/util/detail/popc.cuh index 20b4814216..f335be6fd0 100644 --- a/cpp/include/raft/util/detail/popc.cuh +++ b/cpp/include/raft/util/detail/popc.cuh @@ -36,12 +36,12 @@ namespace raft::detail { */ template void popc(const raft::resources& res, - device_vector_view values, - raft::host_scalar_view max_len, + device_vector_view values, + raft::host_scalar_view max_len, raft::device_scalar_view counter) { auto values_size = values.size(); - auto values_matrix = raft::make_device_matrix_view( + auto values_matrix = raft::make_device_matrix_view( values.data_handle(), values_size, 1); auto counter_vector = raft::make_device_vector_view(counter.data_handle(), 1); diff --git a/cpp/include/raft/util/popc.cuh b/cpp/include/raft/util/popc.cuh index 153694e45e..d4bc01e274 100644 --- a/cpp/include/raft/util/popc.cuh +++ b/cpp/include/raft/util/popc.cuh @@ -31,8 +31,8 @@ namespace raft { */ template void popc(const raft::resources& res, - device_vector_view values, - raft::host_scalar_view max_len, + device_vector_view values, + raft::host_scalar_view max_len, raft::device_scalar_view counter) { detail::popc(res, values, max_len, counter); diff --git a/cpp/test/core/bitset.cu b/cpp/test/core/bitset.cu index b799297e8c..ac601274c1 100644 --- a/cpp/test/core/bitset.cu +++ b/cpp/test/core/bitset.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,12 +32,13 @@ struct test_spec_bitset { uint64_t bitset_len; uint64_t mask_len; uint64_t query_len; + uint64_t repeat_times; }; auto operator<<(std::ostream& os, const test_spec_bitset& ss) -> std::ostream& { os << "bitset{bitset_len: " << ss.bitset_len << ", mask_len: " << ss.mask_len - << ", query_len: " << ss.query_len << "}"; + << ", query_len: " << ss.query_len << ", repeat_times: " << ss.repeat_times << "}"; return os; } @@ -80,6 +81,48 @@ void flip_cpu_bitset(std::vector& bitset) } } +template +void repeat_cpu_bitset(std::vector& input, + size_t input_bits, + size_t repeat, + std::vector& output) +{ + const size_t output_bits = input_bits * repeat; + const size_t output_units = (output_bits + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); + + std::memset(output.data(), 0, output_units * sizeof(bitset_t)); + + size_t output_bit_index = 0; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bitset_t) * 8); + size_t input_bit_offset = i % (sizeof(bitset_t) * 8); + bool bit = (input[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bitset_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bitset_t) * 8); + + output[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } +} + +template +double sparsity_cpu_bitset(std::vector& data, size_t total_bits) +{ + size_t one_count = 0; + for (size_t i = 0; i < total_bits; ++i) { + size_t unit_index = i / (sizeof(bitset_t) * 8); + size_t bit_offset = i % (sizeof(bitset_t) * 8); + bool bit = (data[unit_index] >> bit_offset) & 1; + if (bit == 1) { ++one_count; } + } + return static_cast((total_bits - one_count) / (1.0 * total_bits)); +} + template class BitsetTest : public testing::TestWithParam { protected: @@ -87,13 +130,19 @@ class BitsetTest : public testing::TestWithParam { const test_spec_bitset spec; std::vector bitset_result; std::vector bitset_ref; + std::vector bitset_repeat_ref; + std::vector bitset_repeat_result; raft::resources res; public: explicit BitsetTest() : spec(testing::TestWithParam::GetParam()), bitset_result(raft::ceildiv(spec.bitset_len, uint64_t(bitset_element_size))), - bitset_ref(raft::ceildiv(spec.bitset_len, uint64_t(bitset_element_size))) + bitset_ref(raft::ceildiv(spec.bitset_len, uint64_t(bitset_element_size))), + bitset_repeat_ref( + raft::ceildiv(spec.bitset_len * spec.repeat_times, uint64_t(bitset_element_size))), + bitset_repeat_result( + raft::ceildiv(spec.bitset_len * spec.repeat_times, uint64_t(bitset_element_size))) { } @@ -145,6 +194,50 @@ class BitsetTest : public testing::TestWithParam { resource::sync_stream(res, stream); ASSERT_TRUE(hostVecMatch(bitset_ref, bitset_result, raft::Compare())); + // test sparsity, repeat and eval_n_elements + { + auto my_bitset_view = my_bitset.view(); + auto sparsity_result = my_bitset_view.sparsity(res); + auto sparsity_ref = sparsity_cpu_bitset(bitset_ref, size_t(spec.bitset_len)); + ASSERT_EQ(sparsity_result, sparsity_ref); + + auto eval_n_elements = + bitset_view::eval_n_elements(spec.bitset_len * spec.repeat_times); + ASSERT_EQ(bitset_repeat_ref.size(), eval_n_elements); + + auto repeat_device = raft::make_device_vector(res, eval_n_elements); + RAFT_CUDA_TRY(cudaMemsetAsync( + repeat_device.data_handle(), 0, eval_n_elements * sizeof(bitset_t), stream)); + repeat_cpu_bitset( + bitset_ref, size_t(spec.bitset_len), size_t(spec.repeat_times), bitset_repeat_ref); + + my_bitset_view.repeat(res, index_t(spec.repeat_times), repeat_device.data_handle()); + + ASSERT_EQ(bitset_repeat_ref.size(), repeat_device.size()); + update_host( + bitset_repeat_result.data(), repeat_device.data_handle(), repeat_device.size(), stream); + ASSERT_EQ(bitset_repeat_ref.size(), bitset_repeat_result.size()); + + index_t errors = 0; + static constexpr index_t len_per_item = sizeof(bitset_t) * 8; + bitset_t tail_len = (index_t(spec.bitset_len * spec.repeat_times) % len_per_item); + bitset_t tail_mask = + tail_len ? (bitset_t)((bitset_t{1} << tail_len) - bitset_t{1}) : ~bitset_t{0}; + for (index_t i = 0; i < bitset_repeat_ref.size(); i++) { + if (i == bitset_repeat_ref.size() - 1) { + errors += (bitset_repeat_ref[i] & tail_mask) != (bitset_repeat_result[i] & tail_mask); + } else { + errors += (bitset_repeat_ref[i] != bitset_repeat_result[i]); + } + } + ASSERT_EQ(errors, 0); + + // recheck the sparsity after repeat + sparsity_result = + sparsity_cpu_bitset(bitset_repeat_result, size_t(spec.bitset_len * spec.repeat_times)); + ASSERT_EQ(sparsity_result, sparsity_ref); + } + // Flip the bitset and re-test auto bitset_count = my_bitset.count(res); my_bitset.flip(res); @@ -167,13 +260,14 @@ class BitsetTest : public testing::TestWithParam { } }; -auto inputs_bitset = ::testing::Values(test_spec_bitset{32, 5, 10}, - test_spec_bitset{100, 30, 10}, - test_spec_bitset{1024, 55, 100}, - test_spec_bitset{10000, 1000, 1000}, - test_spec_bitset{1 << 15, 1 << 3, 1 << 12}, - test_spec_bitset{1 << 15, 1 << 24, 1 << 13}, - test_spec_bitset{1 << 25, 1 << 23, 1 << 14}); +auto inputs_bitset = ::testing::Values(test_spec_bitset{32, 5, 10, 101}, + test_spec_bitset{100, 30, 10, 13}, + test_spec_bitset{1024, 55, 100, 1}, + test_spec_bitset{10000, 1000, 1000, 100}, + test_spec_bitset{1 << 15, 1 << 3, 1 << 12, 5}, + test_spec_bitset{1 << 15, 1 << 24, 1 << 13, 3}, + test_spec_bitset{1 << 25, 1 << 23, 1 << 14, 3}, + test_spec_bitset{1 << 25, 1 << 23, 1 << 14, 21}); using Uint16_32 = BitsetTest; TEST_P(Uint16_32, Run) { run(); } diff --git a/cpp/test/util/popc.cu b/cpp/test/util/popc.cu index c08faacb07..28eaad2fcb 100644 --- a/cpp/test/util/popc.cu +++ b/cpp/test/util/popc.cu @@ -76,7 +76,7 @@ class PopcTest : public ::testing::TestWithParam> { index_t bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1) == 0) { - element |= (static_cast(1) << bit_position); + element |= (static_cast(1) << bit_position); num_ones--; } } @@ -101,7 +101,7 @@ class PopcTest : public ::testing::TestWithParam> { raft::make_device_vector_view(bits_d.data(), bits_d.size()); index_t max_len = params.n_rows * params.n_cols; - auto max_len_view = raft::make_host_scalar_view(&max_len); + auto max_len_view = raft::make_host_scalar_view(&max_len); index_t nnz_actual_h = 0; rmm::device_scalar nnz_actual_d(0, stream); @@ -123,8 +123,17 @@ class PopcTest : public ::testing::TestWithParam> { index_t nnz_expected; }; -using PopcTestI32 = PopcTest; -TEST_P(PopcTestI32, Result) { Run(); } +using PopcTestI32_U32 = PopcTest; +TEST_P(PopcTestI32_U32, Result) { Run(); } + +using PopcTestI32_U64 = PopcTest; +TEST_P(PopcTestI32_U64, Result) { Run(); } + +using PopcTestI32_U16 = PopcTest; +TEST_P(PopcTestI32_U16, Result) { Run(); } + +using PopcTestI32_U8 = PopcTest; +TEST_P(PopcTestI32_U8, Result) { Run(); } template const std::vector> popc_inputs = { @@ -154,6 +163,9 @@ const std::vector> popc_inputs = { {2, 33, 0.2}, }; -INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U32, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U64, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U16, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U8, ::testing::ValuesIn(popc_inputs)); } // namespace raft From f37c41c54fc64a4e3689e5a61851ba3821800fee Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 26 Sep 2024 09:45:11 -0500 Subject: [PATCH 10/10] bump NCCL floor to 2.18.1.1 (#2443) Contributes to https://github.com/rapidsai/build-planning/issues/102 Some RAPIDS libraries are using `ncclCommSplit()`, which was introduced in `nccl==2.18.1.1`. This is part of a series of PRs across RAPIDS updating libraries' pins to `nccl>=2.18.1.1` to ensure they get a new-enough version that supports that. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/raft/pull/2443 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-120_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-120_arch-x86_64.yaml | 2 +- conda/recipes/libraft/conda_build_config.yaml | 2 +- conda/recipes/raft-ann-bench/conda_build_config.yaml | 2 +- conda/recipes/raft-dask/conda_build_config.yaml | 3 +++ conda/recipes/raft-dask/meta.yaml | 4 ++-- dependencies.yaml | 2 +- 13 files changed, 16 insertions(+), 13 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 462874a7e7..de4eb7e690 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -36,7 +36,7 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index cfd974a6a8..26f4c1efaa 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -36,7 +36,7 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 82e391e9ae..692956502b 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 0389427d13..133d42bfee 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index eff1c56840..f99cedd627 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-aarch64=11.8 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 87b19d2952..08aea32ab1 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index ff3451c15c..572ad85ab5 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -29,7 +29,7 @@ dependencies: - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index 085e099ae8..9fd23edb03 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -29,7 +29,7 @@ dependencies: - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 5c0047fb9c..6dc06648bd 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -20,7 +20,7 @@ cmake_version: - ">=3.26.4,!=3.30.0" nccl_version: - - ">=2.9.9" + - ">=2.18.1.1" glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml index db0083b583..bdb4e883ea 100644 --- a/conda/recipes/raft-ann-bench/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml @@ -20,7 +20,7 @@ cmake_version: - ">=3.26.4,!=3.30.0" nccl_version: - - ">=2.9.9" + - ">=2.18.1.1" glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index e6afed2890..58e8ec3c9e 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -24,3 +24,6 @@ ucxx_version: cmake_version: - ">=3.26.4,!=3.30.0" + +nccl_version: + - ">=2.18.1.1" diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index 74b26b5935..bc13d352b7 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -50,7 +50,7 @@ requirements: {% endif %} - cuda-version ={{ cuda_version }} - cython >=3.0.0 - - nccl >=2.9.9 + - nccl {{ nccl_version }} - pylibraft {{ version }} - python x.x - rmm ={{ minor_version }} @@ -68,7 +68,7 @@ requirements: - dask-cuda ={{ minor_version }} - rapids-dask-dependency ={{ minor_version }} - joblib >=0.11 - - nccl >=2.9.9 + - nccl {{ nccl_version }} - pylibraft {{ version }} - python x.x - rmm ={{ minor_version }} diff --git a/dependencies.yaml b/dependencies.yaml index 91fb176b56..e833e8519a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -170,7 +170,7 @@ dependencies: packages: - c-compiler - cxx-compiler - - nccl>=2.9.9 + - nccl>=2.18.1.1 - libucxx==0.40.*,>=0.0.0a0 specific: - output_types: conda