diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 4ee5c3b271c..f346cdf8e90 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,26 +46,29 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} docs-build: - if: github.ref_type == 'branch' && github.event_name == 'push' + if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: - build_type: branch - node_type: "gpu-latest-1" arch: "amd64" + branch: ${{ inputs.branch }} + build_type: ${{ inputs.build_type || 'branch' }} container_image: "rapidsai/ci:latest" + date: ${{ inputs.date }} + node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" + sha: ${{ inputs.sha }} wheel-build-pylibcugraph: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -78,15 +81,14 @@ jobs: # the CMake variables in get_cumlprims_mg.cmake since CMake will just use # the clone as is. extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-23.04 + extra-repo-sha: branch-23.06 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY - skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=/project/cugraph-ops/" - uses-setup-env-vars: false + skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/python/pylibcugraph/cugraph-ops/" wheel-publish-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -96,7 +98,7 @@ jobs: wheel-build-cugraph: needs: wheel-publish-pylibcugraph secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -109,15 +111,15 @@ jobs: # the CMake variables in get_cumlprims_mg.cmake since CMake will just use # the clone as is. extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-23.04 + extra-repo-sha: branch-23.06 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY - skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=/project/cugraph-ops/" - uses-setup-env-vars: false + before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 /local-wheelhouse" + skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/python/cugraph/cugraph-ops/" wheel-publish-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b53e139b5ca..2a3f4f073fc 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -24,106 +24,101 @@ jobs: - wheel-build-cugraph - wheel-tests-cugraph secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: pull-request conda-notebook-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request - node_type: "gpu-latest-1" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/test_notebooks.sh" docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request - node_type: "gpu-latest-1" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/build_docs.sh" wheel-build-pylibcugraph: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: pull-request package-name: pylibcugraph package-dir: python/pylibcugraph extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-23.04 + extra-repo-sha: branch-23.06 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY - skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=/project/cugraph-ops/" - uses-setup-env-vars: false + skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/python/pylibcugraph/cugraph-ops/" wheel-tests-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: pull-request package-name: pylibcugraph - # On arm also need to install cupy from the specific webpage. - test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest -v ./python/pylibcugraph/pylibcugraph/tests" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest ./python/pylibcugraph/pylibcugraph/tests" test-smoketest: "python ci/wheel_smoke_test_pylibcugraph.py" wheel-build-cugraph: needs: wheel-tests-pylibcugraph secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: pull-request package-name: cugraph package-dir: python/cugraph extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-23.04 + extra-repo-sha: branch-23.06 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY - before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_cu11 rapids-download-wheels-from-s3 ./local-wheelhouse" - skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=/project/cugraph-ops/" - uses-setup-env-vars: false + before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 /local-wheelhouse" + skbuild-configure-options: "-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/python/cugraph/cugraph-ops/" wheel-tests-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: pull-request package-name: cugraph # Always want to test against latest dask/distributed. - test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && RAPIDS_PY_WHEEL_NAME=pylibcugraph_cu11 rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" + test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" # Skip dataset downloads on arm to save CI time -- arm only runs smoke tests. - # On arm also need to install cupy from the specific site. - test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_cu11 rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -v -m sg ./python/cugraph/cugraph/tests" + test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -m sg ./python/cugraph/cugraph/tests" test-smoketest: "python ci/wheel_smoke_test_cugraph.py" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8856ec3df5d..87dd6104b4e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,19 +32,17 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibcugraph: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} package-name: pylibcugraph - # On arm also need to install cupy from the specific webpage. - test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest -v ./python/pylibcugraph/pylibcugraph/tests" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest ./python/pylibcugraph/pylibcugraph/tests" wheel-tests-cugraph: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -52,7 +50,6 @@ jobs: sha: ${{ inputs.sha }} package-name: cugraph # Always want to test against latest dask/distributed. - test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" - # On arm also need to install cupy from the specific webpage. - test-before-arm64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -v -m sg ./python/cugraph/cugraph/tests" + test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" + test-before-arm64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -m sg ./python/cugraph/cugraph/tests" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c2f5fe2cfb..0f05aedf1a1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,7 @@ # # Before first use: `pre-commit install` # To run: `pre-commit run --all-files` +exclude: '^thirdparty' repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 @@ -32,13 +33,13 @@ repos: additional_dependencies: - flake8==6.0.0 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v11.1.0 + rev: v16.0.1 hooks: - id: clang-format exclude: | (?x)^( cpp/libcugraph_etl| - cpp/tests/c_api/.* + cpp/tests/c_api ) types_or: [c, c++, cuda] args: ["-fallback-style=none", "-style=file", "-i"] diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d40ade9810..4a018c55031 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,111 @@ +# cuGraph 23.06.00 (7 Jun 2023) + +## 🚨 Breaking Changes + +- [BUG] Fix Incorrect File Selection in cuGraph-PyG Loader ([#3599](https://github.com/rapidsai/cugraph/pull/3599)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Remove legacy leiden ([#3581](https://github.com/rapidsai/cugraph/pull/3581)) [@ChuckHastings](https://github.com/ChuckHastings) +- [IMP] Match Default PyG Hop ID Behavior in cuGraph-PyG ([#3565](https://github.com/rapidsai/cugraph/pull/3565)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- [IMP] Sample with Offsets in the Bulk Sampler ([#3524](https://github.com/rapidsai/cugraph/pull/3524)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Dropping Python 3.8 ([#3505](https://github.com/rapidsai/cugraph/pull/3505)) [@divyegala](https://github.com/divyegala) +- Remove legacy renumber and shuffle calls from cython.cu ([#3467](https://github.com/rapidsai/cugraph/pull/3467)) [@ChuckHastings](https://github.com/ChuckHastings) +- Remove legacy implementation of induce subgraph ([#3464](https://github.com/rapidsai/cugraph/pull/3464)) [@ChuckHastings](https://github.com/ChuckHastings) + +## 🐛 Bug Fixes + +- Fix MG Test Failing due to Removal of np.float ([#3621](https://github.com/rapidsai/cugraph/pull/3621)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- fix logic for shuffling results ([#3619](https://github.com/rapidsai/cugraph/pull/3619)) [@ChuckHastings](https://github.com/ChuckHastings) +- [BUG] Fix Calls to cudf.DataFrame/Series.unique that relied on old behavior ([#3616](https://github.com/rapidsai/cugraph/pull/3616)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- correct dgl version in `cugraph-dgl` conda recipe ([#3612](https://github.com/rapidsai/cugraph/pull/3612)) [@tingyu66](https://github.com/tingyu66) +- [BUG] Fix Issue in cuGraph-PyG Tests Blocking CI ([#3607](https://github.com/rapidsai/cugraph/pull/3607)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- [BUG] Critical: Fix cuGraph-PyG Edge Index Renumbering for Single-Edge Graphs ([#3605](https://github.com/rapidsai/cugraph/pull/3605)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- [BUG] Skip Empty Partitions in Bulk Sample Writing ([#3600](https://github.com/rapidsai/cugraph/pull/3600)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- [BUG] Fix Incorrect File Selection in cuGraph-PyG Loader ([#3599](https://github.com/rapidsai/cugraph/pull/3599)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Fix SSSP bug ([#3597](https://github.com/rapidsai/cugraph/pull/3597)) [@jnke2016](https://github.com/jnke2016) +- update cudf column constructor calls ([#3592](https://github.com/rapidsai/cugraph/pull/3592)) [@ChuckHastings](https://github.com/ChuckHastings) +- Fix one more path to cugraphops in build workflow ([#3554](https://github.com/rapidsai/cugraph/pull/3554)) [@vyasr](https://github.com/vyasr) +- Fix path to cugraphops in build workflow ([#3547](https://github.com/rapidsai/cugraph/pull/3547)) [@vyasr](https://github.com/vyasr) +- Update dgl APIs for v1.1.0 ([#3546](https://github.com/rapidsai/cugraph/pull/3546)) [@tingyu66](https://github.com/tingyu66) +- Pin to scikit-build<17.2 ([#3538](https://github.com/rapidsai/cugraph/pull/3538)) [@vyasr](https://github.com/vyasr) +- Correct results from sampling when grouping batches on specific GPUs ([#3517](https://github.com/rapidsai/cugraph/pull/3517)) [@ChuckHastings](https://github.com/ChuckHastings) +- [FIX] Match the PyG API for Node Input to the Loader ([#3514](https://github.com/rapidsai/cugraph/pull/3514)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Correct MG Leiden and SCC tests ([#3509](https://github.com/rapidsai/cugraph/pull/3509)) [@ChuckHastings](https://github.com/ChuckHastings) +- per_v_transform_reduce_incoming|outgoing_e bug fix (when we're using (key, value) pairs to store edge src|dst property values) ([#3508](https://github.com/rapidsai/cugraph/pull/3508)) [@seunghwak](https://github.com/seunghwak) +- Updates to allow python benchmarks to run on additional datasets by default ([#3506](https://github.com/rapidsai/cugraph/pull/3506)) [@rlratzel](https://github.com/rlratzel) +- [BUG] Fix Intermittent Error when Converting cuDF DataFrame to Tensor by Converting to cuPy Array First ([#3498](https://github.com/rapidsai/cugraph/pull/3498)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- [FIX] Update cugraph-PyG Dependencies to include cuGraph ([#3497](https://github.com/rapidsai/cugraph/pull/3497)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Fix graph_properties_t's members order ([#3484](https://github.com/rapidsai/cugraph/pull/3484)) [@naimnv](https://github.com/naimnv) +- Fix issue with latest rapids-make ([#3481](https://github.com/rapidsai/cugraph/pull/3481)) [@ChuckHastings](https://github.com/ChuckHastings) +- Branch 23.06 Fix Forward Merge ([#3462](https://github.com/rapidsai/cugraph/pull/3462)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Update raft dependency to 23.06 ([#3410](https://github.com/rapidsai/cugraph/pull/3410)) [@ChuckHastings](https://github.com/ChuckHastings) + +## 📖 Documentation + +- updated cugraph Demo notebooks for 23.06 ([#3558](https://github.com/rapidsai/cugraph/pull/3558)) [@acostadon](https://github.com/acostadon) +- cugraph-ops license ([#3553](https://github.com/rapidsai/cugraph/pull/3553)) [@BradReesWork](https://github.com/BradReesWork) +- Notebook clean-up and run verification ([#3551](https://github.com/rapidsai/cugraph/pull/3551)) [@acostadon](https://github.com/acostadon) +- Updates contributing steps to add copyright and license text inclusion instruction ([#3519](https://github.com/rapidsai/cugraph/pull/3519)) [@rlratzel](https://github.com/rlratzel) +- Fixed notebook links in algorithm and cugraph notebook pages ([#3515](https://github.com/rapidsai/cugraph/pull/3515)) [@acostadon](https://github.com/acostadon) +- adding cugraph-ops ([#3488](https://github.com/rapidsai/cugraph/pull/3488)) [@BradReesWork](https://github.com/BradReesWork) +- Sphinx updates ([#3468](https://github.com/rapidsai/cugraph/pull/3468)) [@BradReesWork](https://github.com/BradReesWork) + +## 🚀 New Features + +- [REVIEW] Add MNMG with training ([#3603](https://github.com/rapidsai/cugraph/pull/3603)) [@VibhuJawa](https://github.com/VibhuJawa) +- MG Leiden and MG MIS ([#3582](https://github.com/rapidsai/cugraph/pull/3582)) [@naimnv](https://github.com/naimnv) +- graph primitive transform_e ([#3548](https://github.com/rapidsai/cugraph/pull/3548)) [@seunghwak](https://github.com/seunghwak) +- Support CUDA 12.0 for pip wheels ([#3544](https://github.com/rapidsai/cugraph/pull/3544)) [@divyegala](https://github.com/divyegala) +- Updates pytest benchmarks to use synthetic data and multi-GPUs ([#3540](https://github.com/rapidsai/cugraph/pull/3540)) [@rlratzel](https://github.com/rlratzel) +- Enable edge masking ([#3522](https://github.com/rapidsai/cugraph/pull/3522)) [@seunghwak](https://github.com/seunghwak) +- [REVIEW] Profile graph creation runtime and memory footprint ([#3518](https://github.com/rapidsai/cugraph/pull/3518)) [@VibhuJawa](https://github.com/VibhuJawa) +- Bipartite R-mat graph generation. ([#3512](https://github.com/rapidsai/cugraph/pull/3512)) [@seunghwak](https://github.com/seunghwak) +- Dropping Python 3.8 ([#3505](https://github.com/rapidsai/cugraph/pull/3505)) [@divyegala](https://github.com/divyegala) +- Creates Notebook that runs Multi-GPU versions of Jaccard, Sorenson and overlap. ([#3504](https://github.com/rapidsai/cugraph/pull/3504)) [@acostadon](https://github.com/acostadon) +- [cugraph-dgl] Add support for bipartite node features and optional edge features in GATConv ([#3503](https://github.com/rapidsai/cugraph/pull/3503)) [@tingyu66](https://github.com/tingyu66) +- [cugraph-dgl] Add TransformerConv ([#3501](https://github.com/rapidsai/cugraph/pull/3501)) [@tingyu66](https://github.com/tingyu66) +- [cugraph-pyg] Add TransformerConv and support for bipartite node features in GATConv ([#3489](https://github.com/rapidsai/cugraph/pull/3489)) [@tingyu66](https://github.com/tingyu66) +- Branch 23.06 resolve merge conflict for forward merge ([#3409](https://github.com/rapidsai/cugraph/pull/3409)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Refactor Leiden ([#3327](https://github.com/rapidsai/cugraph/pull/3327)) [@jnke2016](https://github.com/jnke2016) + +## 🛠️ Improvements + +- Refresh requirements ([#3622](https://github.com/rapidsai/cugraph/pull/3622)) [@jakirkham](https://github.com/jakirkham) +- Pr3266 continue (optional arg for weight attribute for Nx graphs in `sssp`) ([#3611](https://github.com/rapidsai/cugraph/pull/3611)) [@eriknw](https://github.com/eriknw) +- Enables MG python tests using a single-GPU LocalCUDACluster in CI ([#3596](https://github.com/rapidsai/cugraph/pull/3596)) [@rlratzel](https://github.com/rlratzel) +- UVM notebook update and add tracker for notebooks to readme ([#3595](https://github.com/rapidsai/cugraph/pull/3595)) [@acostadon](https://github.com/acostadon) +- [REVIEW] Skip adding edge types, edge weights ([#3583](https://github.com/rapidsai/cugraph/pull/3583)) [@VibhuJawa](https://github.com/VibhuJawa) +- Remove legacy leiden ([#3581](https://github.com/rapidsai/cugraph/pull/3581)) [@ChuckHastings](https://github.com/ChuckHastings) +- run docs nightly too ([#3568](https://github.com/rapidsai/cugraph/pull/3568)) [@AyodeAwe](https://github.com/AyodeAwe) +- include hop as part of the sort criteria for sampling results ([#3567](https://github.com/rapidsai/cugraph/pull/3567)) [@ChuckHastings](https://github.com/ChuckHastings) +- Add MG python implementation of Leiden ([#3566](https://github.com/rapidsai/cugraph/pull/3566)) [@jnke2016](https://github.com/jnke2016) +- [IMP] Match Default PyG Hop ID Behavior in cuGraph-PyG ([#3565](https://github.com/rapidsai/cugraph/pull/3565)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Switch back to using primary shared-action-workflows branch ([#3562](https://github.com/rapidsai/cugraph/pull/3562)) [@vyasr](https://github.com/vyasr) +- removed deprecated calls and modified demo notebooks to run with 23.06 ([#3561](https://github.com/rapidsai/cugraph/pull/3561)) [@acostadon](https://github.com/acostadon) +- add unit test for checking is_symmetric is valid, update documentatio… ([#3559](https://github.com/rapidsai/cugraph/pull/3559)) [@ChuckHastings](https://github.com/ChuckHastings) +- Update recipes to GTest version >=1.13.0 ([#3549](https://github.com/rapidsai/cugraph/pull/3549)) [@bdice](https://github.com/bdice) +- Improve memory footprint and performance of graph creation ([#3542](https://github.com/rapidsai/cugraph/pull/3542)) [@VibhuJawa](https://github.com/VibhuJawa) +- Update cupy dependency ([#3539](https://github.com/rapidsai/cugraph/pull/3539)) [@vyasr](https://github.com/vyasr) +- Perform expensive edge list check in create_graph_from_edgelist() ([#3533](https://github.com/rapidsai/cugraph/pull/3533)) [@seunghwak](https://github.com/seunghwak) +- Enable sccache hits from local builds ([#3526](https://github.com/rapidsai/cugraph/pull/3526)) [@AyodeAwe](https://github.com/AyodeAwe) +- Build wheels using new single image workflow ([#3525](https://github.com/rapidsai/cugraph/pull/3525)) [@vyasr](https://github.com/vyasr) +- [IMP] Sample with Offsets in the Bulk Sampler ([#3524](https://github.com/rapidsai/cugraph/pull/3524)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Revert shared-action-workflows pin ([#3523](https://github.com/rapidsai/cugraph/pull/3523)) [@divyegala](https://github.com/divyegala) +- [FIX] fix cugraphops namespace ([#3520](https://github.com/rapidsai/cugraph/pull/3520)) [@stadlmax](https://github.com/stadlmax) +- Add support in C API for handling unweighted graphs in algorithms that expect weights ([#3513](https://github.com/rapidsai/cugraph/pull/3513)) [@ChuckHastings](https://github.com/ChuckHastings) +- Changes to support gtest version 1.11 ([#3511](https://github.com/rapidsai/cugraph/pull/3511)) [@ChuckHastings](https://github.com/ChuckHastings) +- update docs ([#3510](https://github.com/rapidsai/cugraph/pull/3510)) [@BradReesWork](https://github.com/BradReesWork) +- Remove usage of rapids-get-rapids-version-from-git ([#3502](https://github.com/rapidsai/cugraph/pull/3502)) [@jjacobelli](https://github.com/jjacobelli) +- Remove Dummy Edge Weights, Support Specifying Edge Ids/Edge Types/Weights Separately ([#3495](https://github.com/rapidsai/cugraph/pull/3495)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- [ENH] Add missing include of thrust/optional.h ([#3493](https://github.com/rapidsai/cugraph/pull/3493)) [@ahendriksen](https://github.com/ahendriksen) +- Remove wheel pytest verbosity ([#3492](https://github.com/rapidsai/cugraph/pull/3492)) [@sevagh](https://github.com/sevagh) +- Update clang-format to 16.0.1. ([#3485](https://github.com/rapidsai/cugraph/pull/3485)) [@bdice](https://github.com/bdice) +- Use ARC V2 self-hosted runners for GPU jobs ([#3483](https://github.com/rapidsai/cugraph/pull/3483)) [@jjacobelli](https://github.com/jjacobelli) +- packed bool specialization to store edge endpoint|edge properties ([#3482](https://github.com/rapidsai/cugraph/pull/3482)) [@seunghwak](https://github.com/seunghwak) +- Remove legacy renumber and shuffle calls from cython.cu ([#3467](https://github.com/rapidsai/cugraph/pull/3467)) [@ChuckHastings](https://github.com/ChuckHastings) +- Remove legacy implementation of induce subgraph ([#3464](https://github.com/rapidsai/cugraph/pull/3464)) [@ChuckHastings](https://github.com/ChuckHastings) +- Remove uses-setup-env-vars ([#3463](https://github.com/rapidsai/cugraph/pull/3463)) [@vyasr](https://github.com/vyasr) +- Optimize random walks ([#3460](https://github.com/rapidsai/cugraph/pull/3460)) [@jnke2016](https://github.com/jnke2016) +- Update select_random_vertices to sample from a given distributed set or from (0, V] ([#3455](https://github.com/rapidsai/cugraph/pull/3455)) [@naimnv](https://github.com/naimnv) + # cuGraph 23.04.00 (6 Apr 2023) ## 🚨 Breaking Changes diff --git a/README.md b/README.md index 8c5e057b9f4..b88cf194fa9 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,8 @@
-[Getting cuGraph](./readme_pages/getting_cugraph.md) * -[Graph Algorithms](./readme_pages/algorithms.md) * +[Getting cuGraph](./docs/cugraph/source/installation/getting_cugraph.md) * +[Graph Algorithms](./docs/cugraph/source/graph_support/algorithms.md) * [Graph Service](./readme_pages/cugraph_service.md) * [Property Graph](./readme_pages/property_graph.md) * [GNN Support](./readme_pages/gnn_support.md) @@ -37,8 +37,9 @@ ----- ## Table of content -- Getting packages - - [Getting cuGraph Packages](./readme_pages/getting_cugraph.md) +- Installation + - [Getting cuGraph Packages](./docs/cugraph/source/installation/getting_cugraph.md) + - [Building from Source](./docs/cugraph/source/installation/source_build.md) - [Contributing to cuGraph](./readme_pages/CONTRIBUTING.md) - General - [Latest News](./readme_pages/news.md) diff --git a/benchmarks/cugraph-dgl/pytest-based/bench_cugraph_dgl_uniform_neighbor_sample.py b/benchmarks/cugraph-dgl/pytest-based/bench_cugraph_dgl_uniform_neighbor_sample.py index f05c4364840..eeee163b0af 100644 --- a/benchmarks/cugraph-dgl/pytest-based/bench_cugraph_dgl_uniform_neighbor_sample.py +++ b/benchmarks/cugraph-dgl/pytest-based/bench_cugraph_dgl_uniform_neighbor_sample.py @@ -39,7 +39,7 @@ def create_graph(graph_data): """ Create a graph instance based on the data to be loaded/generated. - """ + """ print("Initalize Pool on client") rmm.reinitialize(pool_allocator=True) # Assume strings are names of datasets in the datasets package @@ -77,7 +77,7 @@ def create_graph(graph_data): num_nodes_dict = {'_N':num_nodes} gs = CuGraphStorage(num_nodes_dict=num_nodes_dict, single_gpu=True) - gs.add_edge_data(edgelist_df, + gs.add_edge_data(edgelist_df, # reverse to make same graph as cugraph node_col_names=['dst', 'src'], canonical_etype=['_N', 'connects', '_N']) @@ -90,11 +90,9 @@ def create_mg_graph(graph_data): """ Create a graph instance based on the data to be loaded/generated. """ - ## Reserving GPU 0 for client(trainer/service project) - n_devices = os.getenv('DASK_NUM_WORKERS', 4) - n_devices = int(n_devices) + # range starts at 1 to let let 0 be used by benchmark/client process + visible_devices = os.getenv("DASK_WORKER_DEVICES", "1,2,3,4") - visible_devices = ','.join([str(i) for i in range(1, n_devices+1)]) cluster = LocalCUDACluster(protocol='ucx', rmm_pool_size='25GB', CUDA_VISIBLE_DEVICES=visible_devices) client = Client(cluster) Comms.initialize(p2p=True) @@ -137,7 +135,7 @@ def create_mg_graph(graph_data): num_nodes_dict = {'_N':num_nodes} gs = CuGraphStorage(num_nodes_dict=num_nodes_dict, single_gpu=False) - gs.add_edge_data(edgelist_df, + gs.add_edge_data(edgelist_df, node_col_names=['dst', 'src'], canonical_etype=['_N', 'C', '_N']) return (gs, client, cluster) @@ -166,7 +164,7 @@ def get_uniform_neighbor_sample_args( num_start_verts = int(num_verts * 0.25) else: num_start_verts = batch_size - + srcs = G.graphstore.gdata.get_edge_data()['_SRC_'] start_list = srcs.head(num_start_verts) assert len(start_list) == num_start_verts @@ -229,7 +227,7 @@ def bench_cugraph_dgl_uniform_neighbor_sample( fanout_val.reverse() sampler = dgl.dataloading.NeighborSampler(uns_args["fanout"]) sampler_f = sampler.sample_blocks - + # Warmup _ = sampler_f(g=G, seed_nodes=uns_args["seed_nodes"]) # print(f"\n{uns_args}") diff --git a/benchmarks/cugraph/pytest-based/bench_algos.py b/benchmarks/cugraph/pytest-based/bench_algos.py index c57731dee8d..d7fcb7812e4 100644 --- a/benchmarks/cugraph/pytest-based/bench_algos.py +++ b/benchmarks/cugraph/pytest-based/bench_algos.py @@ -12,7 +12,7 @@ # limitations under the License. import pytest - +import numpy as np import pytest_benchmark # FIXME: Remove this when rapids_pytest_benchmark.gpubenchmark is available # everywhere @@ -29,12 +29,16 @@ def setFixtureParamNames(*args, **kwargs): pass +import rmm +import dask_cudf +from pylibcugraph.testing import gen_fixture_params_product + import cugraph +import cugraph.dask as dask_cugraph from cugraph.structure.number_map import NumberMap -from cugraph.testing import utils -from pylibcugraph.testing import gen_fixture_params_product +from cugraph.generators import rmat +from cugraph.testing import utils, mg_utils from cugraph.utilities.utils import is_device_version_less_than -import rmm from cugraph_benchmarking.params import ( directed_datasets, @@ -43,46 +47,122 @@ def setFixtureParamNames(*args, **kwargs): pool_allocator, ) -fixture_params = gen_fixture_params_product( - (directed_datasets + undirected_datasets, "ds"), +# duck-type compatible Dataset for RMAT data +class RmatDataset: + def __init__(self, scale=4, edgefactor=2, mg=False): + self._scale = scale + self._edgefactor = edgefactor + self._edgelist = None + + self.mg = mg + + def __str__(self): + mg_str = "mg" if self.mg else "sg" + return f"rmat_{mg_str}_{self._scale}_{self._edgefactor}" + + def get_edgelist(self, fetch=False): + seed = 42 + if self._edgelist is None: + self._edgelist = rmat( + self._scale, + (2**self._scale)*self._edgefactor, + 0.57, # from Graph500 + 0.19, # from Graph500 + 0.19, # from Graph500 + seed or 42, + clip_and_flip=False, + scramble_vertex_ids=True, + create_using=None, # return edgelist instead of Graph instance + mg=self.mg + ) + rng = np.random.default_rng(seed) + if self.mg: + self._edgelist["weight"] = self._edgelist.map_partitions( + lambda df: rng.random(size=len(df))) + else: + self._edgelist["weight"] = rng.random(size=len(self._edgelist)) + + return self._edgelist + + def get_graph(self, + fetch=False, + create_using=cugraph.Graph, + ignore_weights=False, + store_transposed=False): + if isinstance(create_using, cugraph.Graph): + # what about BFS if trnaposed is True + attrs = {"directed": create_using.is_directed()} + G = type(create_using)(**attrs) + elif type(create_using) is type: + G = create_using() + + edge_attr = None if ignore_weights else "weight" + df = self.get_edgelist() + if isinstance(df, dask_cudf.DataFrame): + G.from_dask_cudf_edgelist(df, + source="src", + destination="dst", + edge_attr=edge_attr, + store_transposed=store_transposed) + else: + G.from_cudf_edgelist(df, + source="src", + destination="dst", + edge_attr=edge_attr, + store_transposed=store_transposed) + return G + + def get_path(self): + """ + (this is likely not needed for use with pytest-benchmark, just added for + API completeness with Dataset.) + """ + return str(self) + + def unload(self): + self._edgelist = None + + +_rmat_scale = getattr(pytest, "_rmat_scale", 20) # ~1M vertices +_rmat_edgefactor = getattr(pytest, "_rmat_edgefactor", 16) # ~17M edges +rmat_sg_dataset = pytest.param(RmatDataset(scale=_rmat_scale, + edgefactor=_rmat_edgefactor, + mg=False), + marks=[pytest.mark.rmat_data, + pytest.mark.sg, + ]) +rmat_mg_dataset = pytest.param(RmatDataset(scale=_rmat_scale, + edgefactor=_rmat_edgefactor, + mg=True), + marks=[pytest.mark.rmat_data, + pytest.mark.mg, + ]) + +rmm_fixture_params = gen_fixture_params_product( (managed_memory, "mm"), (pool_allocator, "pa")) - -############################################################################### -# Helpers -def createGraph(csvFileName, graphType=None): - """ - Helper function to create a Graph (directed or undirected) based on - csvFileName. - """ - if graphType is None: - # There's potential value in verifying that a directed graph can be - # created from a undirected dataset, and an undirected from a directed - # dataset. (For now?) do not include those combinations to keep - # benchmark runtime and complexity lower, and assume tests have - # coverage to verify correctness for those combinations. - if "directed" in csvFileName.parts: - graphType = cugraph.Graph(directed=True) - else: - graphType = cugraph.Graph() - - return cugraph.from_cudf_edgelist( - utils.read_csv_file(csvFileName), - source="0", destination="1", edge_attr="2", - create_using=graphType, - renumber=True) - +dataset_fixture_params = gen_fixture_params_product( + (directed_datasets + + undirected_datasets + + [rmat_sg_dataset, rmat_mg_dataset], "ds")) # Record the current RMM settings so reinitialize() will be called only when a -# change is needed (RMM defaults both values to False). This allows the -# --no-rmm-reinit option to prevent reinitialize() from being called at all +# change is needed (RMM defaults both values to False). The --allow-rmm-reinit +# option is required to allow the RMM options to be set by the pytest user +# directly, in order to prevent reinitialize() from being called more than once # (see conftest.py for details). +# The defaults for managed_mem (False) and pool_alloc (True) are set in +# conftest.py RMM_SETTINGS = {"managed_mem": False, "pool_alloc": False} - +# FIXME: this only changes the RMM config in a SG environment. The dask config +# that applies to RMM in an MG environment is not changed by this! def reinitRMM(managed_mem, pool_alloc): - + """ + Reinitializes RMM to the value of managed_mem and pool_alloc, but only if + those values are different that the current configuration. + """ if (managed_mem != RMM_SETTINGS["managed_mem"]) or \ (pool_alloc != RMM_SETTINGS["pool_alloc"]): @@ -104,79 +184,86 @@ def reinitRMM(managed_mem, pool_alloc): # # For benchmarks, the operations performed in fixtures are not measured as part # of the benchmark. + @pytest.fixture(scope="module", - params=fixture_params) -def edgelistCreated(request): - """ - Returns a new edgelist created from a CSV, which is specified as part of - the parameterization for this fixture. - """ + params=rmm_fixture_params) +def rmm_config(request): # Since parameterized fixtures do not assign param names to param values, # manually call the helper to do so. Ensure the order of the name list # passed to it matches if there are >1 params. # If the request only contains n params, only the first n names are set. - setFixtureParamNames(request, ["dataset", "managed_mem", "pool_allocator"]) - - csvFileName = request.param[0] - reinitRMM(request.param[1], request.param[2]) - return utils.read_csv_file(csvFileName) + setFixtureParamNames(request, ["managed_mem", "pool_allocator"]) + reinitRMM(request.param[0], request.param[1]) @pytest.fixture(scope="module", - params=fixture_params) -def graphWithAdjListComputed(request): + params=dataset_fixture_params) +def dataset(request, rmm_config): + """ - Create a Graph obj from the CSV file in param, compute the adjacency list - and return it. + Fixture which provides a Dataset instance, setting up a Dask cluster and + client if necessary for MG, to tests and other fixtures. When all + tests/fixtures are done with the Dataset, it has the Dask cluster and + client torn down (if MG) and all data loaded is freed. """ - setFixtureParamNames(request, ["dataset", "managed_mem", "pool_allocator"]) - csvFileName = request.param[0] - reinitRMM(request.param[1], request.param[2]) + setFixtureParamNames(request, ["dataset"]) + dataset = request.param[0] + client = cluster = None + # For now, only RmatDataset instanaces support MG and have a "mg" attr. + if hasattr(dataset, "mg") and dataset.mg: + (client, cluster) = mg_utils.start_dask_client() + + yield dataset - G = createGraph(csvFileName, cugraph.structure.graph_classes.Graph) - G.view_adj_list() + dataset.unload() + if client is not None: + mg_utils.stop_dask_client(client, cluster) + + +@pytest.fixture(scope="module") +def edgelist(request, dataset): + df = dataset.get_edgelist() + return df + + +@pytest.fixture(scope="module") +def graph(request, dataset): + G = dataset.get_graph() return G -@pytest.fixture(scope="module", - params=fixture_params) -def anyGraphWithAdjListComputed(request): - """ - Create a Graph (directed or undirected) obj based on the param, compute the - adjacency list and return it. - """ - setFixtureParamNames(request, ["dataset", "managed_mem", "pool_allocator"]) - csvFileName = request.param[0] - reinitRMM(request.param[1], request.param[2]) +@pytest.fixture(scope="module") +def unweighted_graph(request, dataset): + G = dataset.get_graph(ignore_weights=True) + return G + - G = createGraph(csvFileName) - G.view_adj_list() +@pytest.fixture(scope="module") +def directed_graph(request, dataset): + G = dataset.get_graph(create_using=cugraph.Graph(directed=True)) return G -@pytest.fixture(scope="module", - params=fixture_params) -def anyGraphWithTransposedAdjListComputed(request): +@pytest.fixture(scope="module") +def transposed_graph(request, dataset): + G = dataset.get_graph(store_transposed=True) + return G + + +############################################################################### +def is_graph_distributed(graph): """ - Create a Graph (directed or undirected) obj based on the param, compute the - transposed adjacency list and return it. + Return True if graph is distributed (for use with cugraph.dask APIs) """ - setFixtureParamNames(request, ["dataset", "managed_mem", "pool_allocator"]) - csvFileName = request.param[0] - reinitRMM(request.param[1], request.param[2]) - - G = createGraph(csvFileName) - G.view_transposed_adj_list() - return G + return isinstance(graph.edgelist.edgelist_df, dask_cudf.DataFrame) ############################################################################### # Benchmarks -@pytest.mark.ETL -def bench_create_graph(gpubenchmark, edgelistCreated): +def bench_create_graph(gpubenchmark, edgelist): gpubenchmark(cugraph.from_cudf_edgelist, - edgelistCreated, - source="0", destination="1", + edgelist, + source="src", destination="dst", create_using=cugraph.structure.graph_classes.Graph, renumber=False) @@ -184,94 +271,142 @@ def bench_create_graph(gpubenchmark, edgelistCreated): # Creating directed Graphs on small datasets runs in micro-seconds, which # results in thousands of rounds before the default threshold is met, so lower # the max_time for this benchmark. -@pytest.mark.ETL @pytest.mark.benchmark( warmup=True, warmup_iterations=10, max_time=0.005 ) -def bench_create_digraph(gpubenchmark, edgelistCreated): +def bench_create_digraph(gpubenchmark, edgelist): gpubenchmark(cugraph.from_cudf_edgelist, - edgelistCreated, - source="0", destination="1", + edgelist, + source="src", destination="dst", create_using=cugraph.Graph(directed=True), renumber=False) -@pytest.mark.ETL -def bench_renumber(gpubenchmark, edgelistCreated): - gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1") +def bench_renumber(gpubenchmark, edgelist): + gpubenchmark(NumberMap.renumber, edgelist, "src", "dst") -def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed): - gpubenchmark(cugraph.pagerank, anyGraphWithTransposedAdjListComputed) +def bench_pagerank(gpubenchmark, transposed_graph): + pagerank = dask_cugraph.pagerank if is_graph_distributed(transposed_graph) \ + else cugraph.pagerank + gpubenchmark(pagerank, transposed_graph) -def bench_bfs(gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(cugraph.bfs, anyGraphWithAdjListComputed, 0) +def bench_bfs(gpubenchmark, graph): + bfs = dask_cugraph.bfs if is_graph_distributed(graph) else cugraph.bfs + start = graph.edgelist.edgelist_df["src"][0] + gpubenchmark(bfs, graph, start) -def bench_force_atlas2(gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(cugraph.force_atlas2, anyGraphWithAdjListComputed, - max_iter=50) +def bench_force_atlas2(gpubenchmark, graph): + if is_graph_distributed(graph): + pytest.skip("distributed graphs are not supported") + gpubenchmark(cugraph.force_atlas2, graph, max_iter=50) -def bench_sssp(gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(cugraph.sssp, anyGraphWithAdjListComputed, 0) +def bench_sssp(gpubenchmark, graph): + sssp = dask_cugraph.sssp if is_graph_distributed(graph) else cugraph.sssp + start = graph.edgelist.edgelist_df["src"][0] + gpubenchmark(sssp, graph, start) -def bench_jaccard(gpubenchmark, graphWithAdjListComputed): - gpubenchmark(cugraph.jaccard, graphWithAdjListComputed) +def bench_jaccard(gpubenchmark, unweighted_graph): + G = unweighted_graph + jaccard = dask_cugraph.jaccard if is_graph_distributed(G) else cugraph.jaccard + gpubenchmark(jaccard, G) @pytest.mark.skipif( is_device_version_less_than((7, 0)), reason="Not supported on Pascal") -def bench_louvain(gpubenchmark, graphWithAdjListComputed): - gpubenchmark(cugraph.louvain, graphWithAdjListComputed) +def bench_louvain(gpubenchmark, graph): + louvain = dask_cugraph.louvain if is_graph_distributed(graph) else cugraph.louvain + gpubenchmark(louvain, graph) -def bench_weakly_connected_components(gpubenchmark, - anyGraphWithAdjListComputed): - gpubenchmark(cugraph.weakly_connected_components, - anyGraphWithAdjListComputed) +def bench_weakly_connected_components(gpubenchmark, graph): + if is_graph_distributed(graph): + pytest.skip("distributed graphs are not supported") + if graph.is_directed(): + G = graph.to_undirected() + else: + G = graph + gpubenchmark(cugraph.weakly_connected_components, G) -def bench_overlap(gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(cugraph.overlap, anyGraphWithAdjListComputed) +def bench_overlap(gpubenchmark, unweighted_graph): + G = unweighted_graph + overlap = dask_cugraph.overlap if is_graph_distributed(G) else cugraph.overlap + gpubenchmark(overlap, G) -def bench_triangle_count(gpubenchmark, graphWithAdjListComputed): - gpubenchmark(cugraph.triangle_count, graphWithAdjListComputed) +def bench_triangle_count(gpubenchmark, graph): + tc = dask_cugraph.triangle_count if is_graph_distributed(graph) \ + else cugraph.triangle_count + gpubenchmark(tc, graph) -def bench_spectralBalancedCutClustering(gpubenchmark, - graphWithAdjListComputed): - gpubenchmark(cugraph.spectralBalancedCutClustering, - graphWithAdjListComputed, 2) +def bench_spectralBalancedCutClustering(gpubenchmark, graph): + if is_graph_distributed(graph): + pytest.skip("distributed graphs are not supported") + gpubenchmark(cugraph.spectralBalancedCutClustering, graph, 2) @pytest.mark.skip(reason="Need to guarantee graph has weights, " "not doing that yet") -def bench_spectralModularityMaximizationClustering( - gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(cugraph.spectralModularityMaximizationClustering, - anyGraphWithAdjListComputed, 2) +def bench_spectralModularityMaximizationClustering(gpubenchmark, graph): + smmc = dask_cugraph.spectralModularityMaximizationClustering \ + if is_graph_distributed(graph) \ + else cugraph.spectralModularityMaximizationClustering + gpubenchmark(smmc, graph, 2) + + +def bench_graph_degree(gpubenchmark, graph): + gpubenchmark(graph.degree) + + +def bench_graph_degrees(gpubenchmark, graph): + if is_graph_distributed(graph): + pytest.skip("distributed graphs are not supported") + gpubenchmark(graph.degrees) + + +def bench_betweenness_centrality(gpubenchmark, graph): + bc = dask_cugraph.betweenness_centrality if is_graph_distributed(graph) \ + else cugraph.betweenness_centrality + gpubenchmark(bc, graph, k=10, random_state=123) + +def bench_edge_betweenness_centrality(gpubenchmark, graph): + if is_graph_distributed(graph): + pytest.skip("distributed graphs are not supported") + gpubenchmark(cugraph.edge_betweenness_centrality, graph, k=10, seed=123) -def bench_graph_degree(gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(anyGraphWithAdjListComputed.degree) +def bench_uniform_neighbor_sample(gpubenchmark, graph): + uns = dask_cugraph.uniform_neighbor_sample if is_graph_distributed(graph) \ + else cugraph.uniform_neighbor_sample -def bench_graph_degrees(gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(anyGraphWithAdjListComputed.degrees) + seed = 42 + # FIXME: may need to provide number_of_vertices separately + num_verts_in_graph = graph.number_of_vertices() + len_start_list = max(int(num_verts_in_graph * 0.01), 2) + srcs = graph.edgelist.edgelist_df["src"] + frac = len_start_list / num_verts_in_graph + start_list = srcs.sample(frac=frac, random_state=seed) + # Attempt to automatically handle a dask Series + if hasattr(start_list, "compute"): + start_list = start_list.compute() -def bench_betweenness_centrality(gpubenchmark, anyGraphWithAdjListComputed): - gpubenchmark(cugraph.betweenness_centrality, - anyGraphWithAdjListComputed, k=10, random_state=123) + fanout_vals = [5, 5, 5] + gpubenchmark(uns, graph, start_list=start_list, fanout_vals=fanout_vals) -def bench_edge_betweenness_centrality(gpubenchmark, - anyGraphWithAdjListComputed): - gpubenchmark(cugraph.edge_betweenness_centrality, - anyGraphWithAdjListComputed, k=10, seed=123) +def bench_egonet(gpubenchmark, graph): + egonet = dask_cugraph.ego_graph if is_graph_distributed(graph) \ + else cugraph.ego_graph + n = 1 + radius = 2 + gpubenchmark(egonet, graph, n, radius=radius) diff --git a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py index 8fe6e81ccf1..157c64b0b20 100644 --- a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py +++ b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py @@ -107,10 +107,8 @@ def create_mg_graph(graph_data): Create a graph instance based on the data to be loaded/generated, return a tuple containing (graph_obj, num_verts, client, cluster) """ - n_devices = os.getenv("DASK_NUM_WORKERS", 4) - n_devices = int(n_devices) # range starts at 1 to let let 0 be used by benchmark/client process - visible_devices = ",".join([str(i) for i in range(1, n_devices+1)]) + visible_devices = os.getenv("DASK_WORKER_DEVICES", "1,2,3,4") (client, cluster) = start_dask_client( # enable_tcp_over_ucx=True, diff --git a/benchmarks/cugraph/pytest-based/conftest.py b/benchmarks/cugraph/pytest-based/conftest.py index 312afb5f824..fd029471869 100644 --- a/benchmarks/cugraph/pytest-based/conftest.py +++ b/benchmarks/cugraph/pytest-based/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,26 +11,66 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest + + def pytest_addoption(parser): - parser.addoption("--no-rmm-reinit", action="store_true", default=False, - help="Do not reinit RMM to run benchmarks with different" - " managed memory and pool allocator options.") + parser.addoption("--allow-rmm-reinit", + action="store_true", + default=False, + help="Allow RMM to be reinitialized, possibly multiple times within " + "the same process, in order to run benchmarks with different managed " + "memory and pool allocator options. This is not the default behavior " + "since it does not represent a typical use case, and support for " + "this may be limited. Instead, consider multiple pytest runs that " + "use a fixed set of RMM settings.") + parser.addoption("--rmat-scale", + action="store", + type=int, + default=20, + metavar="scale", + help="For use when using synthetic graph data generated using RMAT. " + "This results in a graph with 2^scale vertices. Default is " + "%(default)s.") + parser.addoption("--rmat-edgefactor", + action="store", + type=int, + default=16, + metavar="edgefactor", + help="For use when using synthetic graph data generated using RMAT. " + "This results in a graph with (2^scale)*edgefactor edges. Default " + "is %(default)s.") def pytest_sessionstart(session): - # if the --no-rmm-reinit option is given, set (or add to) the CLI "mark - # expression" (-m) the markers for no managedmem and no poolallocator. This - # will cause the RMM reinit() function to not be called. - if session.config.getoption("no_rmm_reinit"): - newMarkexpr = "managedmem_off and poolallocator_off" + # if the --allow-rmm-reinit option is not given, set (or add to) the CLI + # "mark expression" (-m) the markers for no managedmem and + # poolallocator. This will result in the RMM reinit() function to be called + # only once in the running process (the typical use case). + # + # FIXME: consider making the RMM config options set using a CLI option + # instead of by markers. This would mean only one RMM config can be used + # per test session, which could eliminate problems related to calling RMM + # reinit multiple times in the same process. This would not be a major + # change to the benchmark UX since the user is discouraged from doing a + # reinit multiple times anyway (hence the --allow-rmm-reinit flag). + if session.config.getoption("allow_rmm_reinit") is False: currentMarkexpr = session.config.getoption("markexpr") if ("managedmem" in currentMarkexpr) or \ ("poolallocator" in currentMarkexpr): raise RuntimeError("managedmem and poolallocator markers cannot " - "be used with --no-rmm-reinit") + "be used without --allow-rmm-reinit.") + newMarkexpr = "managedmem_off and poolallocator_on" if currentMarkexpr: newMarkexpr = f"({currentMarkexpr}) and ({newMarkexpr})" session.config.option.markexpr = newMarkexpr + + # Set the value of the CLI options for RMAT here since any RmatDataset + # objects must be instantiated prior to running test fixtures in order to + # have their test ID generated properly. + # FIXME: is there a better way to do this? + pytest._rmat_scale = session.config.getoption("rmat_scale") + pytest._rmat_edgefactor = session.config.getoption("rmat_edgefactor") diff --git a/benchmarks/cugraph/standalone/cugraph_dask_funcs.py b/benchmarks/cugraph/standalone/cugraph_dask_funcs.py index ddc9efc7f77..c6aa4a06100 100644 --- a/benchmarks/cugraph/standalone/cugraph_dask_funcs.py +++ b/benchmarks/cugraph/standalone/cugraph_dask_funcs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,52 +22,11 @@ import cugraph from cugraph.dask.comms import comms as Comms -from cugraph.generators import rmat import tempfile +from cugraph.testing.mg_utils import generate_edgelist -import rmm -def generate_edgelist(scale, - edgefactor, - seed=None, - unweighted=False, -): - """ - Returns a dask_cudf DataFrame created using the R-MAT graph generator. - - The resulting graph is weighted with random values of a uniform distribution - from the interval [0, 1) - - scale is used to determine the number of vertices to be generated (num_verts - = 2^scale), which is also used to determine the data type for the vertex ID - values in the DataFrame. - - edgefactor determies the number of edges (num_edges = num_edges*edgefactor) - - seed, if specified, will be used as the seed to the RNG. - - unweighted determines if the resulting edgelist will have randomly-generated - weightes ranging in value between [0, 1). If True, an edgelist with only 2 - columns is returned. - """ - ddf = rmat( - scale, - (2**scale)*edgefactor, - 0.57, # from Graph500 - 0.19, # from Graph500 - 0.19, # from Graph500 - seed or 42, - clip_and_flip=False, - scramble_vertex_ids=True, - create_using=None, # return edgelist instead of Graph instance - mg=True - ) - if not unweighted: - rng = np.random.default_rng(seed) - ddf["weight"] = ddf.map_partitions(lambda df: rng.random(size=len(df))) - return ddf - def read_csv(input_csv_file, scale): """ diff --git a/benchmarks/cugraph/standalone/cugraph_graph_creation.py b/benchmarks/cugraph/standalone/cugraph_graph_creation.py new file mode 100644 index 00000000000..1edf67bba44 --- /dev/null +++ b/benchmarks/cugraph/standalone/cugraph_graph_creation.py @@ -0,0 +1,229 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.testing.mg_utils import ( + generate_edgelist_rmat, + get_allocation_counts_dask_persist, + get_allocation_counts_dask_lazy, + sizeof_fmt, + get_peak_output_ratio_across_workers, + restart_client, +) + +from cugraph.testing.mg_utils import ( + start_dask_client, + stop_dask_client, + enable_spilling, +) +from cugraph.structure.symmetrize import symmetrize_ddf +import cugraph +import cudf +from time import sleep +import pandas as pd +import time + + +@get_allocation_counts_dask_lazy(return_allocations=True, logging=True) +def construct_graph(dask_dataframe, directed=False, renumber=False): + """ + Args: + dask_dataframe: + dask_dataframe contains weighted and undirected edges with self + loops. Multiple edges will likely be present as well. + directed: + If True, the graph will be directed. + renumber: + If True, the graph will be renumbered. + Returns: + G: cugraph.Graph + """ + st = time.time() + G = cugraph.Graph(directed=directed) + G.from_dask_cudf_edgelist( + dask_dataframe, source="src", destination="dst", renumber=renumber + ) + et = time.time() + g_creation_time = et - st + print(f"Graph creation time = {g_creation_time} s") + return G, g_creation_time + + +@get_allocation_counts_dask_persist(return_allocations=True, logging=True) +def symmetrize_cugraph_df(dask_df, multi=False): + output_df = symmetrize_ddf(dask_df, "src", "dst", multi=multi) + return output_df + + +def benchmark_cugraph_graph_symmetrize(scale, edgefactor, seed, multi): + """ + Benchmark cugraph graph symmetrization + """ + dask_df = generate_edgelist_rmat( + scale=scale, edgefactor=edgefactor, seed=seed, unweighted=True, mg=True + ) + dask_df = dask_df.astype("int64") + dask_df = dask_df.reset_index(drop=True) + input_memory = dask_df.memory_usage().sum().compute() + num_input_edges = len(dask_df) + print(f"Number of input edges = {num_input_edges:,}, multi = {multi}") + output_df, allocation_counts = symmetrize_cugraph_df(dask_df, multi=multi) + ( + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) = get_memory_statistics( + allocation_counts=allocation_counts, input_memory=input_memory + ) + print(f"Number of edges after symmetrization = {len(output_df):,}") + print("-" * 80) + return ( + num_input_edges, + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) + + +def benchmark_cugraph_graph_creation(scale, edgefactor, seed, directed, renumber): + """ + Entry point for the benchmark. + """ + dask_df = generate_edgelist_rmat( + scale=scale, + edgefactor=edgefactor, + seed=seed, + unweighted=True, + mg=True, + ) + # We do below to remove the rmat memory overhead + # which holds on to GPU memory + dask_df = dask_df.map_partitions(lambda df: df.to_pandas()).persist() + dask_df = dask_df.map_partitions(cudf.from_pandas) + dask_df = dask_df.astype("int64") + dask_df = dask_df.reset_index(drop=True) + input_memory = dask_df.memory_usage().sum().compute() + num_input_edges = len(dask_df) + print( + f"Number of input edges = {num_input_edges:,}, directed = {directed}, renumber = {renumber}" + ) + (G, g_creation_time), allocation_counts = construct_graph( + dask_df, directed=directed, renumber=renumber + ) + ( + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) = get_memory_statistics( + allocation_counts=allocation_counts, input_memory=input_memory + ) + print(f"Number of edges in final graph = {G.number_of_edges():,}") + print("-" * 80) + return ( + num_input_edges, + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + g_creation_time, + ) + + +def get_memory_statistics(allocation_counts, input_memory): + """ + Get memory statistics for the benchmark. + """ + output_to_peak_ratio = get_peak_output_ratio_across_workers(allocation_counts) + peak_allocation_across_workers = max( + [a["peak_bytes"] for a in allocation_counts.values()] + ) + input_memory_per_worker = input_memory / len(allocation_counts.keys()) + input_to_peak_ratio = peak_allocation_across_workers / input_memory_per_worker + print(f"Edge List Memory = {sizeof_fmt(input_memory_per_worker)}") + print(f"Peak Memory across workers = {sizeof_fmt(peak_allocation_across_workers)}") + print(f"Max Peak to output graph ratio across workers = {output_to_peak_ratio:.2f}") + print( + f"Max Peak to avg input graph ratio across workers = {input_to_peak_ratio:.2f}" + ) + return ( + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) + + +if __name__ == "__main__": + client, cluster = start_dask_client(dask_worker_devices=[1], jit_unspill=False) + enable_spilling() + stats_ls = [] + client.run(enable_spilling) + for scale in [23, 24, 25]: + for directed in [True, False]: + for renumber in [True, False]: + try: + stats_d = {} + ( + num_input_edges, + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + g_creation_time, + ) = benchmark_cugraph_graph_creation( + scale=scale, + edgefactor=16, + seed=123, + directed=directed, + renumber=renumber, + ) + stats_d["scale"] = scale + stats_d["num_input_edges"] = num_input_edges + stats_d["directed"] = directed + stats_d["renumber"] = renumber + stats_d["input_memory_per_worker"] = sizeof_fmt( + input_memory_per_worker + ) + stats_d["peak_allocation_across_workers"] = sizeof_fmt( + peak_allocation_across_workers + ) + stats_d["input_to_peak_ratio"] = input_to_peak_ratio + stats_d["output_to_peak_ratio"] = output_to_peak_ratio + stats_d["g_creation_time"] = g_creation_time + stats_ls.append(stats_d) + except Exception as e: + print(e) + restart_client(client) + sleep(10) + + print("-" * 40 + f"renumber completed" + "-" * 40) + stats_df = pd.DataFrame( + stats_ls, + columns=[ + "scale", + "num_input_edges", + "directed", + "renumber", + "input_memory_per_worker", + "peak_allocation_across_workers", + "input_to_peak_ratio", + "output_to_peak_ratio", + "g_creation_time", + ], + ) + stats_df.to_csv("cugraph_graph_creation_stats.csv") + print("-" * 40 + f"scale = {scale} completed" + "-" * 40) + # Cleanup Dask Cluster + stop_dask_client(client, cluster) diff --git a/benchmarks/pytest.ini b/benchmarks/pytest.ini index b61fa92d403..6af3aab27fe 100644 --- a/benchmarks/pytest.ini +++ b/benchmarks/pytest.ini @@ -14,7 +14,6 @@ markers = managedmem_off: RMM managed memory disabled poolallocator_on: RMM pool allocator enabled poolallocator_off: RMM pool allocator disabled - ETL: benchmarks for ETL steps small: small datasets tiny: tiny datasets directed: directed datasets @@ -50,6 +49,8 @@ markers = num_clients_32: start 32 cugraph-service clients fanout_10_25: fanout [10, 25] for sampling algos fanout_5_10_15: fanout [5, 10, 15] for sampling algos + rmat_data: RMAT-generated synthetic datasets + file_data: datasets from $RAPIDS_DATASET_ROOT_DIR python_classes = Bench* diff --git a/benchmarks/shared/python/cugraph_benchmarking/params.py b/benchmarks/shared/python/cugraph_benchmarking/params.py index 4cf749d0c21..ee63b8768a6 100644 --- a/benchmarks/shared/python/cugraph_benchmarking/params.py +++ b/benchmarks/shared/python/cugraph_benchmarking/params.py @@ -11,32 +11,68 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pathlib import Path - import pytest -from cugraph.testing import utils from pylibcugraph.testing.utils import gen_fixture_params +from cugraph.testing import RAPIDS_DATASET_ROOT_DIR_PATH +from cugraph.experimental.datasets import ( + Dataset, + karate, +) +# Create Dataset objects from .csv files. +# Once the cugraph.dataset package is updated to include the metadata files for +# these (like karate), these will no longer need to be explicitly instantiated. +hollywood = Dataset( + csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/hollywood.csv", + csv_col_names=["src", "dst"], + csv_col_dtypes=["int32", "int32"]) +europe_osm = Dataset( + csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/europe_osm.csv", + csv_col_names=["src", "dst"], + csv_col_dtypes=["int32", "int32"]) +cit_patents = Dataset( + csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/cit-Patents.csv", + csv_col_names=["src", "dst"], + csv_col_dtypes=["int32", "int32"]) +soc_livejournal = Dataset( + csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/soc-LiveJournal1.csv", + csv_col_names=["src", "dst"], + csv_col_dtypes=["int32", "int32"]) -# FIXME: omitting soc-twitter-2010.csv due to OOM error on some workstations. +# Assume all "file_data" (.csv file on disk) datasets are too small to be useful for MG. undirected_datasets = [ - pytest.param(Path(utils.RAPIDS_DATASET_ROOT_DIR) / "karate.csv", - marks=[pytest.mark.tiny, pytest.mark.undirected]), - pytest.param(Path(utils.RAPIDS_DATASET_ROOT_DIR) / "csv/undirected/hollywood.csv", - marks=[pytest.mark.small, pytest.mark.undirected]), - pytest.param(Path(utils.RAPIDS_DATASET_ROOT_DIR) / "csv/undirected/europe_osm.csv", - marks=[pytest.mark.undirected]), - # pytest.param("../datasets/csv/undirected/soc-twitter-2010.csv", - # marks=[pytest.mark.undirected]), + pytest.param(karate, + marks=[pytest.mark.tiny, + pytest.mark.undirected, + pytest.mark.file_data, + pytest.mark.sg, + ]), + pytest.param(hollywood, + marks=[pytest.mark.small, + pytest.mark.undirected, + pytest.mark.file_data, + pytest.mark.sg, + ]), + pytest.param(europe_osm, + marks=[pytest.mark.undirected, + pytest.mark.file_data, + pytest.mark.sg, + ]), ] directed_datasets = [ - pytest.param(Path(utils.RAPIDS_DATASET_ROOT_DIR) / "csv/directed/cit-Patents.csv", - marks=[pytest.mark.small, pytest.mark.directed]), - pytest.param(Path( - utils.RAPIDS_DATASET_ROOT_DIR) / "csv/directed/soc-LiveJournal1.csv", - marks=[pytest.mark.directed]), + pytest.param(cit_patents, + marks=[pytest.mark.small, + pytest.mark.directed, + pytest.mark.file_data, + pytest.mark.sg, + ]), + pytest.param(soc_livejournal, + marks=[pytest.mark.directed, + pytest.mark.file_data, + pytest.mark.sg, + ]), ] managed_memory = [ diff --git a/build.sh b/build.sh index 063f881020d..a8e97d924c6 100755 --- a/build.sh +++ b/build.sh @@ -272,7 +272,7 @@ if buildAll || hasArg libcugraph_etl; then CUGRAPH_CMAKE_CUDA_ARCHITECTURES="NATIVE" echo "Building for the architecture of the GPU in the system..." else - CUGRAPH_CMAKE_CUDA_ARCHITECTURES="ALL" + CUGRAPH_CMAKE_CUDA_ARCHITECTURES="RAPIDS" echo "Building for *ALL* supported GPU architectures..." fi mkdir -p ${LIBCUGRAPH_ETL_BUILD_DIR} diff --git a/ci/build_docs.sh b/ci/build_docs.sh index dc449437704..8dffbc1668c 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -19,7 +19,7 @@ rapids-print-env rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) -VERSION_NUMBER=$(rapids-get-rapids-version-from-git) +VERSION_NUMBER="23.06" rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ @@ -53,7 +53,7 @@ sphinx-build -b text source _text popd -if [[ "${RAPIDS_BUILD_TYPE}" == "branch" ]]; then +if [[ "${RAPIDS_BUILD_TYPE}" != "pull-request" ]]; then rapids-logger "Upload Docs to S3" aws s3 sync --no-progress --delete docs/cugraph/_html "s3://rapidsai-docs/cugraph/${VERSION_NUMBER}/html" aws s3 sync --no-progress --delete docs/cugraph/_text "s3://rapidsai-docs/cugraph/${VERSION_NUMBER}/txt" diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh index ed291077494..610a603cef8 100755 --- a/ci/release/apply_wheel_modifications.sh +++ b/ci/release/apply_wheel_modifications.sh @@ -29,3 +29,7 @@ sed -i "s/raft-dask/raft-dask${CUDA_SUFFIX}/g" python/cugraph/pyproject.toml sed -i "s/pylibcugraph/pylibcugraph${CUDA_SUFFIX}/g" python/cugraph/pyproject.toml sed -i "s/pylibraft/pylibraft${CUDA_SUFFIX}/g" python/cugraph/pyproject.toml sed -i "s/ucx-py/ucx-py${CUDA_SUFFIX}/g" python/cugraph/pyproject.toml + +if [[ $CUDA_SUFFIX == "-cu12" ]]; then + sed -i "s/cupy-cuda11x/cupy-cuda12x/g" python/cugraph/pyproject.toml +fi diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index a221cdea51e..59f39b4828f 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -72,17 +72,18 @@ sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pylibcugrap for FILE in conda/environments/*.yaml dependencies.yaml; do sed_runner "s/libcugraphops=${CURRENT_SHORT_TAG}/libcugraphops=${NEXT_SHORT_TAG}/g" ${FILE}; + sed_runner "s/pylibcugraphops=${CURRENT_SHORT_TAG}/pylibcugraphops=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/libraft-headers=${CURRENT_SHORT_TAG}/libraft-headers=${NEXT_SHORT_TAG}/g" ${FILE}; - sed_runner "s/libraft-distance=${CURRENT_SHORT_TAG}/libraft-distance=${NEXT_SHORT_TAG}/g" ${FILE}; + sed_runner "s/libraft=${CURRENT_SHORT_TAG}/libraft=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/pyraft=${CURRENT_SHORT_TAG}/pyraft=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/raft-dask=${CURRENT_SHORT_TAG}/raft-dask=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/pylibraft=${CURRENT_SHORT_TAG}/pylibraft=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; - sed_runner "s/ucx-py=.*/ucx-py=${NEXT_UCX_PY_VERSION}/g" ${FILE}; + sed_runner "s/ucx-py==.*/ucx-py==${NEXT_UCX_PY_VERSION}/g" ${FILE}; done # Doxyfile update @@ -93,13 +94,15 @@ sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}\"/}" conda/ sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}\"/}" conda/recipes/cugraph-service/conda_build_config.yaml sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}\"/}" conda/recipes/pylibcugraph/conda_build_config.yaml +# CI files for FILE in .github/workflows/*.yaml; do sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" # Wheel builds clone cugraph-ops, update its branch sed_runner "s/extra-repo-sha: branch-.*/extra-repo-sha: branch-${NEXT_SHORT_TAG}/g" "${FILE}" # Wheel builds install dask-cuda from source, update its branch - sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" "${FILE}" + sed_runner "s/dask-cuda.git@branch-[0-9][0-9].[0-9][0-9]/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done +sed_runner "s/VERSION_NUMBER=\".*/VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh # Need to distutils-normalize the original version diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index a6c4cdb4a4f..f02ac748f18 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -34,7 +34,7 @@ nvidia-smi # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" pushd "${RAPIDS_DATASET_ROOT_DIR}" -./get_test_data.sh +./get_test_data.sh --subset popd EXITCODE=0 diff --git a/ci/test_python.sh b/ci/test_python.sh index 2a6be338819..3a23f521734 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -43,7 +43,7 @@ nvidia-smi # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" pushd "${RAPIDS_DATASET_ROOT_DIR}" -./get_test_data.sh +./get_test_data.sh --benchmark popd EXITCODE=0 @@ -64,14 +64,17 @@ popd rapids-logger "pytest cugraph" pushd python/cugraph/cugraph +export DASK_WORKER_DEVICES="0" pytest \ - -m sg \ + -v \ + --benchmark-disable \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph.xml" \ --cov-config=../../.coveragerc \ --cov=cugraph \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \ --cov-report=term \ + -k "not test_property_graph_mg" \ tests popd @@ -80,7 +83,7 @@ pushd benchmarks pytest \ --capture=no \ --verbose \ - -m "managedmem_on and poolallocator_on and tiny" \ + -m tiny \ --benchmark-disable \ cugraph/pytest-based/bench_algos.py popd @@ -124,7 +127,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then pylibcugraphops \ cugraph \ cugraph-dgl \ - 'dgl>=1.0' \ + 'dgl>=1.1.0.cu*' \ 'pytorch>=2.0' \ 'pytorch-cuda>=11.8' @@ -179,6 +182,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then --channel "${PYTHON_CHANNEL}" \ libcugraph \ pylibcugraph \ + pylibcugraphops \ cugraph \ cugraph-pyg @@ -198,13 +202,13 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then --cov-report=term \ . popd - + # Reactivate the test environment back set +u conda deactivate conda activate test set -u - + else rapids-logger "skipping cugraph_pyg pytest on ARM64" fi diff --git a/ci/utils/is_pascal.py b/ci/utils/is_pascal.py index e55a3153a12..e716f59422f 100644 --- a/ci/utils/is_pascal.py +++ b/ci/utils/is_pascal.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,9 +26,7 @@ pascal = False device = cuda.get_current_device() -# check for the attribute using both pre and post numba 0.53 names -cc = getattr(device, 'COMPUTE_CAPABILITY', None) or \ - getattr(device, 'compute_capability') +cc = device.compute_capability if (cc[0] < 7): pascal = True diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 2494d4c9c67..f3d2afd2e24 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,35 +10,34 @@ dependencies: - aiohttp - c-compiler - cmake>=3.23.1,!=3.25.0 -- cuda-python>=11.7.1,<12.0 - cudatoolkit=11.8 -- cudf==23.4.* -- cupy>=9.5.0,<12.0.0a0 +- cudf==23.6.* +- cupy>=12.0.0 - cxx-compiler - cython>=0.29,<0.30 - dask-core==2023.3.2 -- dask-cuda==23.4.* -- dask-cudf==23.4.* +- dask-cuda==23.6.* +- dask-cudf==23.6.* - dask==2023.3.2 - distributed==2023.3.2.1 - doxygen - fsspec[http]>=0.6.0 - gcc_linux-64=11.* -- gmock=1.10.0 +- gmock>=1.13.0 - graphviz -- gtest=1.10.0 +- gtest>=1.13.0 - ipython -- libcudf=23.04.* -- libcugraphops=23.04.* -- libraft-headers=23.04.* -- libraft=23.04.* -- librmm=23.04.* +- libcudf=23.6.* +- libcugraphops=23.6.* +- libraft-headers=23.6.* +- libraft=23.6.* +- librmm=23.6.* - nbsphinx - nccl>=2.9.9 - networkx>=2.5.1 - ninja - notebook>=0.5.0 -- numba>=0.56.2 +- numba>=0.57 - numpy>=1.21 - numpydoc - nvcc_linux-64=11.8 @@ -46,17 +45,18 @@ dependencies: - pandas - pre-commit - pydata-sphinx-theme -- pylibraft==23.4.* +- pylibcugraphops=23.6.* +- pylibraft==23.6.* - pytest - pytest-benchmark - pytest-cov - pytest-xdist - python-louvain -- raft-dask==23.4.* +- raft-dask==23.6.* - recommonmark - requests -- rmm==23.4.* -- scikit-build>=0.13.1 +- rmm==23.6.* +- scikit-build>=0.13.1,<0.17.2 - scikit-learn>=0.23.1 - scipy - sphinx-copybutton @@ -64,5 +64,5 @@ dependencies: - sphinx<6 - sphinxcontrib-websupport - ucx-proc=*=gpu -- ucx-py=0.31.* +- ucx-py==0.32.* name: all_cuda-118_arch-x86_64 diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index 240574b5cac..96d25da45fb 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -20,13 +20,13 @@ build: requirements: host: - - python x.x + - python run: - cugraph ={{ version }} - - dgl >=0.9.1 - - numba >=0.56.2 - - numpy - - python x.x + - dgl >=1.1.0.cu* + - numba >=0.57 + - numpy >=1.21 + - python - pytorch tests: diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 097f49bf527..71a64c771e2 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -23,14 +23,15 @@ requirements: - sysroot_{{ target_platform }} {{ sysroot_version }} host: - cython >=0.29,<0.30 - - python x.x + - python - scikit-build >=0.13.1 run: - distributed ==2023.3.2.1 - - numba >=0.56.2 - - numpy + - numba >=0.57 + - numpy >=1.21 + - python - pytorch >=2.0 - - cupy >=9.5.0,<12.0.0a0 + - cupy >=12.0.0 - cugraph ={{ version }} - pyg >=2.3,<2.4 diff --git a/conda/recipes/cugraph-service/conda_build_config.yaml b/conda/recipes/cugraph-service/conda_build_config.yaml index a47aacd6699..ab90a8af2a4 100644 --- a/conda/recipes/cugraph-service/conda_build_config.yaml +++ b/conda/recipes/cugraph-service/conda_build_config.yaml @@ -1,2 +1,2 @@ ucx_py_version: - - "0.31.*" + - "0.32.*" diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml index 499e28e88fc..d0a27883010 100644 --- a/conda/recipes/cugraph-service/meta.yaml +++ b/conda/recipes/cugraph-service/meta.yaml @@ -27,9 +27,9 @@ outputs: requirements: host: - pip - - python x.x + - python run: - - python x.x + - python - thriftpy2 >=0.4.15 - name: cugraph-service-server @@ -47,19 +47,20 @@ outputs: requirements: host: - pip - - python x.x + - python - setuptools - wheel run: - {{ pin_subpackage('cugraph-service-client', exact=True) }} - cudf ={{ minor_version }} - cugraph ={{ minor_version }} - - cupy >=9.5.0,<12.0.0a0 + - cupy >=12.0.0 - dask-cuda ={{ minor_version }} - dask-cudf ={{ minor_version }} - distributed ==2023.3.2.1 - - numpy - - python x.x + - numba >=0.57 + - numpy >=1.21 + - python - thriftpy2 >=0.4.15 - ucx-py {{ ucx_py_version }} diff --git a/conda/recipes/cugraph/conda_build_config.yaml b/conda/recipes/cugraph/conda_build_config.yaml index 1bf2cf3f5d4..20194c031f4 100644 --- a/conda/recipes/cugraph/conda_build_config.yaml +++ b/conda/recipes/cugraph/conda_build_config.yaml @@ -14,7 +14,7 @@ sysroot_version: - "2.17" ucx_py_version: - - "0.31.*" + - "0.32.*" # The CTK libraries below are missing from the conda-forge::cudatoolkit # package. The "*_host_*" version specifiers correspond to `11.8` packages. diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 0e6946c54bd..10f29e13f11 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -32,6 +32,7 @@ build: - SCCACHE_S3_KEY_PREFIX=cugraph-aarch64 # [aarch64] - SCCACHE_S3_KEY_PREFIX=cugraph-linux64 # [linux64] - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS ignore_run_exports_from: - {{ compiler('cuda') }} @@ -59,7 +60,7 @@ requirements: - libraft ={{ minor_version }} - libraft-headers ={{ minor_version }} - pylibraft ={{ minor_version}} - - python x.x + - python - raft-dask ={{ minor_version }} - scikit-build >=0.13.1 - setuptools @@ -69,7 +70,7 @@ requirements: - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} - cuda-python >=11.7.1,<12.0 - cudf ={{ minor_version }} - - cupy >=9.5.0,<12.0.0a0 + - cupy >=12.0.0 - dask-cuda ={{ minor_version }} - dask-cudf ={{ minor_version }} - dask ==2023.3.2 @@ -80,7 +81,7 @@ requirements: - libraft-headers ={{ minor_version }} - pylibcugraph ={{ version }} - pylibraft ={{ minor_version }} - - python x.x + - python - raft-dask ={{ minor_version }} - ucx-proc=*=gpu - ucx-py {{ ucx_py_version }} diff --git a/conda/recipes/libcugraph/conda_build_config.yaml b/conda/recipes/libcugraph/conda_build_config.yaml index 83a383236a4..2fa26d99c09 100644 --- a/conda/recipes/libcugraph/conda_build_config.yaml +++ b/conda/recipes/libcugraph/conda_build_config.yaml @@ -17,7 +17,7 @@ nccl_version: - ">=2.9.9" gtest_version: - - "=1.10.0" + - ">=1.13.0" cuda_profiler_api_version: - ">=11.8.86,<12" diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 5d53d2640b6..f843aabba92 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -29,6 +29,7 @@ build: - SCCACHE_S3_KEY_PREFIX=libcugraph-aarch64 # [aarch64] - SCCACHE_S3_KEY_PREFIX=libcugraph-linux64 # [linux64] - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS requirements: build: diff --git a/conda/recipes/pylibcugraph/conda_build_config.yaml b/conda/recipes/pylibcugraph/conda_build_config.yaml index 1bf2cf3f5d4..20194c031f4 100644 --- a/conda/recipes/pylibcugraph/conda_build_config.yaml +++ b/conda/recipes/pylibcugraph/conda_build_config.yaml @@ -14,7 +14,7 @@ sysroot_version: - "2.17" ucx_py_version: - - "0.31.*" + - "0.32.*" # The CTK libraries below are missing from the conda-forge::cudatoolkit # package. The "*_host_*" version specifiers correspond to `11.8` packages. diff --git a/conda/recipes/pylibcugraph/meta.yaml b/conda/recipes/pylibcugraph/meta.yaml index a29231ad1df..de031a6fe94 100644 --- a/conda/recipes/pylibcugraph/meta.yaml +++ b/conda/recipes/pylibcugraph/meta.yaml @@ -32,6 +32,7 @@ build: - SCCACHE_S3_KEY_PREFIX=pylibcugraph-aarch64 # [aarch64] - SCCACHE_S3_KEY_PREFIX=pylibcugraph-linux64 # [linux64] - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS ignore_run_exports_from: - {{ compiler('cuda') }} @@ -59,7 +60,7 @@ requirements: - libraft ={{ minor_version }} - libraft-headers ={{ minor_version }} - pylibraft ={{ minor_version}} - - python x.x + - python - rmm ={{ minor_version }} - scikit-build >=0.13.1 - setuptools @@ -68,7 +69,7 @@ requirements: run: - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} - libcugraph ={{ version }} - - python x.x + - python tests: requirements: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 675637fd210..fe908fbd9bf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -25,7 +25,7 @@ include(rapids-find) rapids_cuda_init_architectures(CUGRAPH) -project(CUGRAPH VERSION 23.04.01 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 23.06.00 LANGUAGES C CXX CUDA) if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0) @@ -213,10 +213,8 @@ set(CUGRAPH_SOURCES src/community/leiden_sg.cu src/community/leiden_mg.cu src/community/legacy/louvain.cu - src/community/legacy/leiden.cu src/community/legacy/ktruss.cu src/community/legacy/ecg.cu - src/community/legacy/extract_subgraph_by_vertex.cu src/community/egonet_sg.cu src/community/egonet_mg.cu src/sampling/random_walks.cu @@ -232,6 +230,7 @@ set(CUGRAPH_SOURCES src/components/legacy/connectivity.cu src/centrality/legacy/betweenness_centrality.cu src/generators/generate_rmat_edgelist.cu + src/generators/generate_bipartite_rmat_edgelist.cu src/generators/generator_tools.cu src/generators/simple_generators.cu src/generators/erdos_renyi_generator.cu @@ -404,7 +403,8 @@ add_library(cugraph_c src/c_api/capi_helper.cu src/c_api/legacy_spectral.cpp src/c_api/legacy_ecg.cpp - src/c_api/graph_helper.cu + src/c_api/graph_helper_sg.cu + src/c_api/graph_helper_mg.cu src/c_api/graph_generators.cpp src/c_api/induced_subgraph_result.cpp src/c_api/hits.cpp diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 3428562510f..5d04cd9b539 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "libcugraph" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER=23.04 +PROJECT_NUMBER=23.06 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 5eb347eb716..3bb98ce4150 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -677,47 +677,6 @@ void flatten_dendrogram(raft::handle_t const& handle, Dendrogram const& dendrogram, typename graph_view_t::vertex_type* clustering); -/** - * @brief Legacy Leiden implementation - * - * Compute a clustering of the graph by maximizing modularity using the Leiden improvements - * to the Louvain method. - * - * Computed using the Leiden method described in: - * - * Traag, V. A., Waltman, L., & van Eck, N. J. (2019). From Louvain to Leiden: - * guaranteeing well-connected communities. Scientific reports, 9(1), 5233. - * doi: 10.1038/s41598-019-41695-z - * - * @throws cugraph::logic_error when an error occurs. - * - * @tparam vertex_t Type of vertex identifiers. - * Supported value : int (signed, 32-bit) - * @tparam edge_t Type of edge identifiers. - * Supported value : int (signed, 32-bit) - * @tparam weight_t Type of edge weights. Supported values : float or double. - * - * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, - * @param[in] graph input graph object (CSR) - * @param[out] clustering Pointer to device array where the clustering should be stored - * @param[in] max_level (optional) maximum number of levels to run (default 100) - * @param[in] resolution (optional) The value of the resolution parameter to use. - * Called gamma in the modularity formula, this changes the size - * of the communities. Higher resolutions lead to more smaller - * communities, lower resolutions lead to fewer larger - * communities. (default 1) - * - * @return a pair containing: - * 1) number of levels of the returned clustering - * 2) modularity of the returned clustering - */ -template -std::pair leiden(raft::handle_t const& handle, - legacy::GraphCSRView const& graph, - vertex_t* clustering, - size_t max_level = 100, - weight_t resolution = weight_t{1}); - /** * @brief Leiden implementation * @@ -738,7 +697,9 @@ std::pair leiden(raft::handle_t const& handle, * Supported value : int (signed, 32-bit) * @tparam weight_t Type of edge weights. Supported values : float or double. * - * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param rng_state The RngState instance holding pseudo-random number generator state. * @param graph_view Graph view object. * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p * edge_weight_view.has_value() == false, edge weights are assumed to be 1.0. @@ -748,6 +709,10 @@ std::pair leiden(raft::handle_t const& handle, * of the communities. Higher resolutions lead to more smaller * communities, lower resolutions lead to fewer larger * communities. (default 1) + * @param[in] theta (optional) The value of the parameter to scale modularity + * gain in Leiden refinement phase. It is used to compute + * the probability of joining a random leiden community. + * Called theta in the Leiden algorithm. * * @return a pair containing: * 1) unique pointer to dendrogram @@ -757,10 +722,12 @@ std::pair leiden(raft::handle_t const& handle, template std::pair>, weight_t> leiden( raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, size_t max_level = 100, - weight_t resolution = weight_t{1}); + weight_t resolution = weight_t{1}, + weight_t theta = weight_t{1}); /** * @brief Leiden implementation @@ -782,7 +749,9 @@ std::pair>, weight_t> leiden( * Supported value : int (signed, 32-bit) * @tparam weight_t Type of edge weights. Supported values : float or double. * - * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param rng_state The RngState instance holding pseudo-random number generator state. * @param graph_view Graph view object. * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p * edge_weight_view.has_value() == false, edge weights are assumed to be 1.0. @@ -792,6 +761,11 @@ std::pair>, weight_t> leiden( * of the communities. Higher resolutions lead to more smaller * communities, lower resolutions lead to fewer larger * communities. (default 1) + * @param[in] theta (optional) The value of the parameter to scale modularity + * gain in Leiden refinement phase. It is used to compute + * the probability of joining a random leiden community. + * Called theta in the Leiden algorithm. + * communities. (default 1) * * @return a pair containing: * 1) number of levels of the returned clustering @@ -800,11 +774,13 @@ std::pair>, weight_t> leiden( template std::pair leiden( raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, vertex_t* clustering, // FIXME: Use (device_)span instead size_t max_level = 100, - weight_t resolution = weight_t{1}); + weight_t resolution = weight_t{1}, + weight_t theta = weight_t{1}); /** * @brief Computes the ecg clustering of the given graph. @@ -1667,7 +1643,7 @@ sample_neighbors_adjacency_list(raft::handle_t const& handle, vertex_t const* ptr_d_start, size_t num_start_vertices, size_t sampling_size, - ops::gnn::graph::SamplingAlgoT sampling_algo); + ops::graph::SamplingAlgoT sampling_algo); /** * @brief generate sub-sampled graph as an edge list (COO format) given input graph, @@ -1697,7 +1673,7 @@ std::tuple, rmm::device_uvector> sample_ vertex_t const* ptr_d_start, size_t num_start_vertices, size_t sampling_size, - ops::gnn::graph::SamplingAlgoT sampling_algo); + ops::graph::SamplingAlgoT sampling_algo); #endif /** @@ -2033,6 +2009,25 @@ std::tuple, rmm::device_uvector> k_hop_nbr size_t k, bool do_expensive_check = false); +/* + * @brief Find a Maximal Independent Set + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param rng_state The RngState instance holding pseudo-random number generator state. + * @return A device vector containing vertices found in the maximal independent set + */ + +template +rmm::device_uvector maximal_independent_set( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + } // namespace cugraph /** diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index 2e18a71898f..02b931fbde6 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -265,7 +265,7 @@ class edge_partition_device_view_t +#include +#include +#include #include #include @@ -25,10 +28,18 @@ namespace cugraph { namespace detail { -template +template ::value_type> class edge_partition_edge_property_device_view_t { public: - using value_type = typename thrust::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type, value_t> || + cugraph::has_packed_bool_element()); + static_assert(cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); + + using edge_type = edge_t; + using value_type = value_t; edge_partition_edge_property_device_view_t() = default; @@ -41,9 +52,116 @@ class edge_partition_edge_property_device_view_t { __host__ __device__ ValueIterator value_first() { return value_first_; } - __device__ ValueIterator get_iter(edge_t offset) const { return value_first_ + offset; } + __device__ value_t get(edge_t offset) const + { + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(offset); + return static_cast(*(value_first_ + cugraph::packed_bool_offset(offset)) & mask); + } else { + return *(value_first_ + offset); + } + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>>, + void> + set(edge_t offset, value_t val) const + { + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(offset); + if (val) { + atomicOr(value_first_ + cugraph::packed_bool_offset(offset), mask); + } else { + atomicAnd(value_first_ + cugraph::packed_bool_offset(offset), ~mask); + } + } else { + *(value_first_ + offset) = val; + } + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>>, + value_t> + atomic_and(edge_t offset, value_t val) const + { + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(offset); + auto old = atomicAnd(value_first_ + cugraph::packed_bool_offset(offset), + val ? uint32_t{0xffffffff} : ~mask); + return static_cast(old & mask); + } else { + return cugraph::atomic_and(value_first_ + offset, val); + } + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>>, + value_t> + atomic_or(edge_t offset, value_t val) const + { + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(offset); + auto old = + atomicOr(value_first_ + cugraph::packed_bool_offset(offset), val ? mask : uint32_t{0}); + return static_cast(old & mask); + } else { + return cugraph::atomic_or(value_first_ + offset, val); + } + } - __device__ value_type get(edge_t offset) const { return *get_iter(offset); } + template + __device__ std::enable_if_t< + !std::is_const_v::reference>> && + !cugraph::has_packed_bool_element() /* add undefined for (packed-)bool */, + value_t> + atomic_add(edge_t offset, value_t val) const + { + cugraph::atomic_add(value_first_ + offset, val); + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>>, + value_t> + elementwise_atomic_cas(edge_t offset, value_t compare, value_t val) const + { + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(offset); + auto old = val ? atomicOr(value_first_ + cugraph::packed_bool_offset(offset), mask) + : atomicAnd(value_first_ + cugraph::packed_bool_offset(offset), ~mask); + return static_cast(old & mask); + } else { + return cugraph::elementwise_atomic_cas(value_first_ + offset, compare, val); + } + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>> && + !cugraph::has_packed_bool_element() /* min undefined for (packed-)bool */, + value_t> + elementwise_atomic_min(edge_t offset, value_t val) const + { + cugraph::elementwise_atomic_min(value_first_ + offset, val); + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>> && + !cugraph::has_packed_bool_element() /* max undefined for (packed-)bool */, + value_t> + elementwise_atomic_max(edge_t offset, value_t val) const + { + cugraph::elementwise_atomic_max(value_first_ + offset, val); + } private: ValueIterator value_first_{}; @@ -52,6 +170,7 @@ class edge_partition_edge_property_device_view_t { template class edge_partition_edge_dummy_property_device_view_t { public: + using edge_type = edge_t; using value_type = thrust::nullopt_t; edge_partition_edge_dummy_property_device_view_t() = default; diff --git a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh index 459547198a5..1ff279fbdca 100644 --- a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,9 @@ #pragma once #include +#include #include +#include #include @@ -32,15 +34,22 @@ namespace cugraph { namespace detail { -template +template ::value_type> class edge_partition_endpoint_property_device_view_t { public: - using value_type = typename thrust::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type, value_t> || + cugraph::has_packed_bool_element()); + + using vertex_type = vertex_t; + using value_type = value_t; edge_partition_endpoint_property_device_view_t() = default; edge_partition_endpoint_property_device_view_t( - edge_major_property_view_t const& view, size_t partition_idx) + edge_major_property_view_t const& view, size_t partition_idx) : value_first_(view.value_firsts()[partition_idx]), range_first_(view.major_range_firsts()[partition_idx]) { @@ -54,7 +63,7 @@ class edge_partition_endpoint_property_device_view_t { } edge_partition_endpoint_property_device_view_t( - edge_minor_property_view_t const& view) + edge_minor_property_view_t const& view) { if (view.keys()) { keys_ = *(view.keys()); @@ -65,25 +74,104 @@ class edge_partition_endpoint_property_device_view_t { range_first_ = view.minor_range_first(); } - __device__ ValueIterator get_iter(vertex_t offset) const + __device__ value_t get(vertex_t offset) const { - auto value_offset = offset; - if (keys_) { - auto chunk_idx = static_cast(offset) / (*key_chunk_size_); - auto it = thrust::lower_bound(thrust::seq, - (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx], - (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx + 1], - range_first_ + offset); - assert((it != (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx + 1]) && - (*it == (range_first_ + offset))); - value_offset = (*key_chunk_start_offsets_)[chunk_idx] + - static_cast(thrust::distance( - (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx], it)); + auto val_offset = value_offset(offset); + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(val_offset); + return static_cast(*(value_first_ + cugraph::packed_bool_offset(val_offset)) & mask); + } else { + return *(value_first_ + val_offset); } - return value_first_ + value_offset; } - __device__ value_type get(vertex_t offset) const { return *get_iter(offset); } + template + __device__ std::enable_if_t< + !std::is_const_v::reference>>, + value_t> + atomic_and(vertex_t offset, value_t val) const + { + auto val_offset = value_offset(offset); + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(val_offset); + auto old = atomicAnd(value_first_ + cugraph::packed_bool_offset(val_offset), + val ? cugraph::packed_bool_full_mask() : ~mask); + return static_cast(old & mask); + } else { + return cugraph::atomic_and(value_first_ + val_offset, val); + } + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>>, + value_t> + atomic_or(vertex_t offset, value_t val) const + { + auto val_offset = value_offset(offset); + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(val_offset); + auto old = atomicOr(value_first_ + cugraph::packed_bool_offset(val_offset), + val ? mask : cugraph::packed_bool_empty_mask()); + return static_cast(old & mask); + } else { + return cugraph::atomic_or(value_first_ + val_offset, val); + } + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>> && + !cugraph::has_packed_bool_element() /* add undefined for (packed-)bool */, + value_t> + atomic_add(vertex_t offset, value_t val) const + { + auto val_offset = value_offset(offset); + cugraph::atomic_add(value_first_ + val_offset, val); + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>>, + value_t> + elementwise_atomic_cas(vertex_t offset, value_t compare, value_t val) const + { + auto val_offset = value_offset(offset); + if constexpr (cugraph::has_packed_bool_element()) { + static_assert(std::is_arithmetic_v, "unimplemented for thrust::tuple types."); + auto mask = cugraph::packed_bool_mask(val_offset); + auto old = val ? atomicOr(value_first_ + cugraph::packed_bool_offset(val_offset), mask) + : atomicAnd(value_first_ + cugraph::packed_bool_offset(val_offset), ~mask); + return static_cast(old & mask); + } else { + return cugraph::elementwise_atomic_cas(value_first_ + val_offset, compare, val); + } + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>> && + !cugraph::has_packed_bool_element() /* min undefined for (packed-)bool */, + value_t> + elementwise_atomic_min(vertex_t offset, value_t val) const + { + auto val_offset = value_offset(offset); + cugraph::elementwise_atomic_min(value_first_ + val_offset, val); + } + + template + __device__ std::enable_if_t< + !std::is_const_v::reference>> && + !cugraph::has_packed_bool_element() /* max undefined for (packed-)bool */, + value_t> + elementwise_atomic_max(vertex_t offset, value_t val) const + { + auto val_offset = value_offset(offset); + cugraph::elementwise_atomic_max(value_first_ + val_offset, val); + } private: thrust::optional> keys_{thrust::nullopt}; @@ -92,12 +180,31 @@ class edge_partition_endpoint_property_device_view_t { ValueIterator value_first_{}; vertex_t range_first_{}; + + __device__ vertex_t value_offset(vertex_t offset) const + { + auto val_offset = offset; + if (keys_) { + auto chunk_idx = static_cast(offset) / (*key_chunk_size_); + auto it = thrust::lower_bound(thrust::seq, + (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx], + (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx + 1], + range_first_ + offset); + assert((it != (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx + 1]) && + (*it == (range_first_ + offset))); + val_offset = (*key_chunk_start_offsets_)[chunk_idx] + + static_cast(thrust::distance( + (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx], it)); + } + return val_offset; + } }; template class edge_partition_endpoint_dummy_property_device_view_t { public: - using value_type = thrust::nullopt_t; + using vertex_type = vertex_t; + using value_type = thrust::nullopt_t; edge_partition_endpoint_dummy_property_device_view_t() = default; diff --git a/cpp/include/cugraph/edge_property.hpp b/cpp/include/cugraph/edge_property.hpp index fdd28bc1eb6..8904006a2a2 100644 --- a/cpp/include/cugraph/edge_property.hpp +++ b/cpp/include/cugraph/edge_property.hpp @@ -17,21 +17,30 @@ #pragma once #include +#include #include #include #include +#include #include #include namespace cugraph { -template +template ::value_type> class edge_property_view_t { public: - using value_type = typename thrust::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type, value_t> || + cugraph::has_packed_bool_element()); + + using edge_type = edge_t; + using value_type = value_t; using value_iterator = ValueIterator; edge_property_view_t() = default; @@ -61,6 +70,8 @@ class edge_dummy_property_view_t { template class edge_property_t { public: + static_assert(cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); + using edge_type = typename GraphViewType::edge_type; using value_type = T; using buffer_type = decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); @@ -70,18 +81,39 @@ class edge_property_t { edge_property_t(raft::handle_t const& handle, GraphViewType const& graph_view) { buffers_.reserve(graph_view.number_of_local_edge_partitions()); + edge_counts_ = std::vector(graph_view.number_of_local_edge_partitions(), 0); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - buffers_.push_back(allocate_dataframe_buffer( - graph_view.local_edge_partition_view(i).number_of_edges(), handle.get_stream())); + auto num_edges = + static_cast(graph_view.local_edge_partition_view(i).number_of_edges()); + size_t buffer_size = + std::is_same_v ? cugraph::packed_bool_size(num_edges) : num_edges; + buffers_.push_back( + allocate_dataframe_buffer, uint32_t, T>>( + buffer_size, handle.get_stream())); + edge_counts_[i] = num_edges; + } + } + + template >> + edge_property_t(std::vector&& buffers) : buffers_(std::move(buffers)) + { + edge_counts_.resize(buffers_.size()); + for (size_t i = 0; i < edge_counts_.size(); ++i) { + edge_counts_[i] = size_dataframe_buffer(buffers_[i]); } } - edge_property_t(std::vector&& buffers) : buffers_(std::move(buffers)) {} + edge_property_t(std::vector&& buffers, std::vector&& edge_counts) + : buffers_(std::move(buffers)), edge_counts_(std::move(edge_counts)) + { + } void clear(raft::handle_t const& handle) { buffers_.clear(); buffers_.shrink_to_fit(); + edge_counts_.clear(); + edge_counts_.shrink_to_fit(); } auto view() const @@ -92,11 +124,11 @@ class edge_property_t { std::vector edge_partition_edge_counts(buffers_.size()); for (size_t i = 0; i < edge_partition_value_firsts.size(); ++i) { edge_partition_value_firsts[i] = get_dataframe_buffer_cbegin(buffers_[i]); - edge_partition_edge_counts[i] = size_dataframe_buffer(buffers_[i]); + edge_partition_edge_counts[i] = edge_counts_[i]; } - return edge_property_view_t(edge_partition_value_firsts, - edge_partition_edge_counts); + return edge_property_view_t(edge_partition_value_firsts, + edge_partition_edge_counts); } auto mutable_view() @@ -107,15 +139,16 @@ class edge_property_t { std::vector edge_partition_edge_counts(buffers_.size()); for (size_t i = 0; i < edge_partition_value_firsts.size(); ++i) { edge_partition_value_firsts[i] = get_dataframe_buffer_begin(buffers_[i]); - edge_partition_edge_counts[i] = size_dataframe_buffer(buffers_[i]); + edge_partition_edge_counts[i] = edge_counts_[i]; } - return edge_property_view_t(edge_partition_value_firsts, - edge_partition_edge_counts); + return edge_property_view_t(edge_partition_value_firsts, + edge_partition_edge_counts); } private: std::vector buffers_{}; + std::vector edge_counts_{}; }; class edge_dummy_property_t { @@ -125,11 +158,12 @@ class edge_dummy_property_t { auto view() const { return edge_dummy_property_view_t{}; } }; -template -auto view_concat(edge_property_view_t const&... views) +template +auto view_concat(edge_property_view_t const&... views) { using concat_value_iterator = decltype(thrust::make_zip_iterator( thrust_tuple_cat(to_thrust_iterator_tuple(views.value_firsts()[0])...))); + using concat_value_type = decltype(thrust_tuple_cat(to_thrust_tuple(Types{})...)); std::vector edge_partition_concat_value_firsts{}; auto first_view = get_first_of_pack(views...); @@ -139,8 +173,8 @@ auto view_concat(edge_property_view_t const&... views) thrust_tuple_cat(to_thrust_iterator_tuple(views.value_firsts()[i])...)); } - return edge_property_view_t(edge_partition_concat_value_firsts, - first_view.edge_counts()); + return edge_property_view_t( + edge_partition_concat_value_firsts, first_view.edge_counts()); } } // namespace cugraph diff --git a/cpp/include/cugraph/edge_src_dst_property.hpp b/cpp/include/cugraph/edge_src_dst_property.hpp index f894d3d1a60..d27f6856428 100644 --- a/cpp/include/cugraph/edge_src_dst_property.hpp +++ b/cpp/include/cugraph/edge_src_dst_property.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -25,6 +26,7 @@ #include #include +#include #include #include @@ -34,10 +36,17 @@ namespace cugraph { namespace detail { -template +template ::value_type> class edge_major_property_view_t { public: - using value_type = typename thrust::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type, value_t> || + cugraph::has_packed_bool_element()); + + using vertex_type = vertex_t; + using value_type = value_t; using value_iterator = ValueIterator; edge_major_property_view_t() = default; @@ -101,10 +110,17 @@ class edge_major_property_view_t { std::vector edge_partition_major_range_firsts_{}; }; -template +template ::value_type> class edge_minor_property_view_t { public: - using value_type = typename thrust::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type, value_t> || + cugraph::has_packed_bool_element()); + + using vertex_type = vertex_t; + using value_type = value_t; using value_iterator = ValueIterator; edge_minor_property_view_t() = default; @@ -152,7 +168,11 @@ class edge_minor_property_view_t { template class edge_major_property_t { public: - using buffer_type = decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); + static_assert(cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); + + using buffer_type = + decltype(allocate_dataframe_buffer, uint32_t, T>>( + size_t{0}, rmm::cuda_stream_view{})); edge_major_property_t(raft::handle_t const& handle) {} @@ -163,8 +183,12 @@ class edge_major_property_t { { buffers_.reserve(edge_partition_major_range_firsts_.size()); for (size_t i = 0; i < edge_partition_major_range_firsts_.size(); ++i) { + size_t buffer_size = std::is_same_v + ? cugraph::packed_bool_size(edge_partition_major_range_sizes[i]) + : edge_partition_major_range_sizes[i]; buffers_.push_back( - allocate_dataframe_buffer(edge_partition_major_range_sizes[i], handle.get_stream())); + allocate_dataframe_buffer, uint32_t, T>>( + buffer_size, handle.get_stream())); } } @@ -181,8 +205,12 @@ class edge_major_property_t { { buffers_.reserve(edge_partition_major_range_firsts_.size()); for (size_t i = 0; i < edge_partition_major_range_firsts_.size(); ++i) { + size_t buffer_size = std::is_same_v + ? cugraph::packed_bool_size(edge_partition_keys[i].size()) + : edge_partition_keys[i].size(); buffers_.push_back( - allocate_dataframe_buffer(edge_partition_keys[i].size(), handle.get_stream())); + allocate_dataframe_buffer, uint32_t, T>>( + buffer_size, handle.get_stream())); } } @@ -208,14 +236,14 @@ class edge_major_property_t { } if (edge_partition_keys_) { - return edge_major_property_view_t( + return edge_major_property_view_t( *edge_partition_keys_, *edge_partition_key_chunk_start_offsets_, *key_chunk_size_, edge_partition_value_firsts, edge_partition_major_range_firsts_); } else { - return edge_major_property_view_t( + return edge_major_property_view_t( edge_partition_value_firsts, edge_partition_major_range_firsts_); } } @@ -230,14 +258,14 @@ class edge_major_property_t { } if (edge_partition_keys_) { - return edge_major_property_view_t( + return edge_major_property_view_t( *edge_partition_keys_, *edge_partition_key_chunk_start_offsets_, *key_chunk_size_, edge_partition_value_firsts, edge_partition_major_range_firsts_); } else { - return edge_major_property_view_t( + return edge_major_property_view_t( edge_partition_value_firsts, edge_partition_major_range_firsts_); } } @@ -256,8 +284,11 @@ class edge_major_property_t { template class edge_minor_property_t { public: + static_assert(cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); + edge_minor_property_t(raft::handle_t const& handle) - : buffer_(allocate_dataframe_buffer(size_t{0}, handle.get_stream())), + : buffer_(allocate_dataframe_buffer, uint32_t, T>>( + size_t{0}, handle.get_stream())), minor_range_first_(vertex_t{0}) { } @@ -265,7 +296,9 @@ class edge_minor_property_t { edge_minor_property_t(raft::handle_t const& handle, vertex_t buffer_size, vertex_t minor_range_first) - : buffer_(allocate_dataframe_buffer(buffer_size, handle.get_stream())), + : buffer_(allocate_dataframe_buffer, uint32_t, T>>( + std::is_same_v ? cugraph::packed_bool_size(buffer_size) : buffer_size, + handle.get_stream())), minor_range_first_(minor_range_first) { } @@ -278,7 +311,9 @@ class edge_minor_property_t { : keys_(keys), key_chunk_start_offsets_(key_chunk_start_offsets), key_chunk_size_(key_chunk_size), - buffer_(allocate_dataframe_buffer(keys.size(), handle.get_stream())), + buffer_(allocate_dataframe_buffer, uint32_t, T>>( + std::is_same_v ? cugraph::packed_bool_size(keys.size()) : keys.size(), + handle.get_stream())), minor_range_first_(minor_range_first) { } @@ -298,11 +333,11 @@ class edge_minor_property_t { { auto value_first = get_dataframe_buffer_cbegin(buffer_); if (keys_) { - return edge_minor_property_view_t( + return edge_minor_property_view_t( *keys_, *key_chunk_start_offsets_, *key_chunk_size_, value_first, minor_range_first_); } else { - return edge_minor_property_view_t(value_first, - minor_range_first_); + return edge_minor_property_view_t(value_first, + minor_range_first_); } } @@ -310,11 +345,11 @@ class edge_minor_property_t { { auto value_first = get_dataframe_buffer_begin(buffer_); if (keys_) { - return edge_minor_property_view_t( + return edge_minor_property_view_t( *keys_, *key_chunk_start_offsets_, *key_chunk_size_, value_first, minor_range_first_); } else { - return edge_minor_property_view_t(value_first, - minor_range_first_); + return edge_minor_property_view_t(value_first, + minor_range_first_); } } @@ -323,7 +358,8 @@ class edge_minor_property_t { std::optional> key_chunk_start_offsets_{std::nullopt}; std::optional key_chunk_size_{std::nullopt}; - decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})) buffer_; + decltype(allocate_dataframe_buffer, uint32_t, T>>( + size_t{0}, rmm::cuda_stream_view{})) buffer_; vertex_t minor_range_first_{}; }; @@ -338,9 +374,10 @@ class edge_endpoint_dummy_property_view_t { template class edge_src_property_t { public: - using value_type = T; static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + using value_type = T; + edge_src_property_t(raft::handle_t const& handle) : property_(handle) {} edge_src_property_t(raft::handle_t const& handle, GraphViewType const& graph_view) @@ -429,10 +466,10 @@ class edge_src_property_t { template class edge_dst_property_t { public: - using value_type = T; - static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + using value_type = T; + edge_dst_property_t(raft::handle_t const& handle) : property_(handle) {} edge_dst_property_t(raft::handle_t const& handle, GraphViewType const& graph_view) @@ -532,11 +569,12 @@ class edge_dst_dummy_property_t { auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; } }; -template -auto view_concat(detail::edge_major_property_view_t const&... views) +template +auto view_concat(detail::edge_major_property_view_t const&... views) { using concat_value_iterator = decltype(thrust::make_zip_iterator( thrust_tuple_cat(to_thrust_iterator_tuple(views.value_firsts()[0])...))); + using concat_value_type = decltype(thrust_tuple_cat(to_thrust_tuple(Types{})...)); std::vector edge_partition_concat_value_firsts{}; auto first_view = get_first_of_pack(views...); @@ -547,23 +585,24 @@ auto view_concat(detail::edge_major_property_view_t const&... view } if (first_view.key_chunk_size()) { - return detail::edge_major_property_view_t( + return detail::edge_major_property_view_t( *(first_view.keys()), *(first_view.key_chunk_start_offsets()), *(first_view.key_chunk_size()), edge_partition_concat_value_firsts, first_view.major_range_firsts()); } else { - return detail::edge_major_property_view_t( + return detail::edge_major_property_view_t( edge_partition_concat_value_firsts, first_view.major_range_firsts()); } } -template -auto view_concat(detail::edge_minor_property_view_t const&... views) +template +auto view_concat(detail::edge_minor_property_view_t const&... views) { - using concat_value_iterator = decltype( - thrust::make_zip_iterator(thrust_tuple_cat(to_thrust_iterator_tuple(views.value_first())...))); + using concat_value_iterator = decltype(thrust::make_zip_iterator( + thrust_tuple_cat(to_thrust_iterator_tuple(views.value_first())...))); + using concat_value_type = decltype(thrust_tuple_cat(to_thrust_tuple(Types{})...)); concat_value_iterator edge_partition_concat_value_first{}; @@ -573,14 +612,14 @@ auto view_concat(detail::edge_minor_property_view_t const&... view thrust::make_zip_iterator(thrust_tuple_cat(to_thrust_iterator_tuple(views.value_first())...)); if (first_view.key_chunk_size()) { - return detail::edge_minor_property_view_t( + return detail::edge_minor_property_view_t( *(first_view.keys()), *(first_view.key_chunk_start_offsets()), *(first_view.key_chunk_size()), edge_partition_concat_value_first, first_view.minor_range_first()); } else { - return detail::edge_minor_property_view_t( + return detail::edge_minor_property_view_t( edge_partition_concat_value_first, first_view.minor_range_first()); } } diff --git a/cpp/include/cugraph/graph.hpp b/cpp/include/cugraph/graph.hpp index 233824049f3..60b9f1a4054 100644 --- a/cpp/include/cugraph/graph.hpp +++ b/cpp/include/cugraph/graph.hpp @@ -310,27 +310,22 @@ template struct invalid_idx< T, typename std::enable_if_t::value && std::is_signed::value>> - : std::integral_constant { -}; + : std::integral_constant {}; template struct invalid_idx< T, typename std::enable_if_t::value && std::is_unsigned::value>> - : std::integral_constant::max()> { -}; + : std::integral_constant::max()> {}; template -struct invalid_vertex_id : invalid_idx { -}; +struct invalid_vertex_id : invalid_idx {}; template -struct invalid_edge_id : invalid_idx { -}; +struct invalid_edge_id : invalid_idx {}; template -struct invalid_component_id : invalid_idx { -}; +struct invalid_component_id : invalid_idx {}; template __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 64a50b582b5..1c01568ae17 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -892,8 +892,9 @@ weight_t compute_total_edge_weight( * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Graph view object of the input graph to compute the maximum per-vertex outgoing - * edge weight sums. + * @param graph_view Graph view object of the input graph to select random vertices from. + * @param given_set Distributed set to sample from. If @p given_set is not specified, sample from + * the entire vertex range provided by @p graph_view. * @param rng_state The RngState instance holding pseudo-random number generator state. * @param select_count The number of vertices to select from the graph * @param with_replacement If true, select with replacement, if false select without replacement @@ -904,9 +905,11 @@ template select_random_vertices( raft::handle_t const& handle, graph_view_t const& graph_view, + std::optional> given_set, raft::random::RngState& rng_state, size_t select_count, bool with_replacement, - bool sort_vertices); + bool sort_vertices, + bool do_expensive_check = false); } // namespace cugraph diff --git a/cpp/include/cugraph/graph_generators.hpp b/cpp/include/cugraph/graph_generators.hpp index fab92259196..4944e0f4917 100644 --- a/cpp/include/cugraph/graph_generators.hpp +++ b/cpp/include/cugraph/graph_generators.hpp @@ -127,6 +127,45 @@ std::tuple, rmm::device_uvector> generat double c = 0.19, bool clip_and_flip = false); +/** + * @brief generate an edge list for a bipartite R-mat graph. + * + * The source vertex IDs will be in the range of [0, 2^src_scale) and the destination vertex IDs + * will be in the range of [0, 2^dst_scale). This function allows multi-edges. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param rng_state RAFT RNG state, updated with each call + * @param src_scale Scale factor to set the range of source vertex IDs (or the first vertex set) in + * the bipartite graph. Vertex IDs have values in [0, V_src), where V_src = 1 << @p src_scale. + * @param dst_scale Scale factor to set the range of destination vertex IDs (or the second vertex + * set) in the bipartite graph. Vertex IDs have values in [0, V_dst), where V_dst = 1 << @p + * dst_scale. + * @param num_edges Number of edges to generate. + * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_bipartite_rmat_edgelist(raft::handle_t const& handle, + raft::random::RngState& rng_state, + size_t src_scale, + size_t dst_scale, + size_t num_edges, + double a = 0.57, + double b = 0.19, + double c = 0.19); + enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; /** @@ -408,11 +447,30 @@ symmetrize_edgelist_from_triangular( std::optional>&& optional_d_weights_v, bool check_diagonal = false); +/** + * @brief scramble vertex IDs in a graph + * + * Given a vertex list for a graph, scramble the input vertex IDs. + * + * The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices Vector of input vertices + * @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN). + * @return rmm::device_uvector object storing scrambled vertex IDs. + */ +template +rmm::device_uvector scramble_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + size_t lgN); + /** * @brief scramble vertex ids in a graph * - * Given an edgelist for a graph, scramble all vertex ids by the given offset. - * This translation is done in place. + * Given an edge list for a graph, scramble the input vertex IDs. * * The scramble code here follows the algorithm in the Graph 500 reference * implementation version 3.0.0. @@ -420,17 +478,18 @@ symmetrize_edgelist_from_triangular( * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param d_src_v Vector of source vertices - * @param d_dst_v Vector of destination vertices - * @param vertex_id_offset Offset to add to each vertex id - * @param seed Used to initialize random number generator + * @param d_src_v Vector of input source vertices + * @param d_dst_v Vector of input destination vertices + * @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN). + * @return Tuple of two rmm::device_uvector objects storing scrambled source & destination vertex + * IDs, respectively. */ template -void scramble_vertex_ids(raft::handle_t const& handle, - rmm::device_uvector& d_src_v, - rmm::device_uvector& d_dst_v, - vertex_t vertex_id_offset, - uint64_t seed = 0); +std::tuple, rmm::device_uvector> scramble_vertex_ids( + raft::handle_t const& handle, + rmm::device_uvector&& srcs, + rmm::device_uvector&& dsts, + size_t lgN); /** * @brief Combine edgelists from multiple sources into a single edgelist diff --git a/cpp/include/cugraph/graph_mask.hpp b/cpp/include/cugraph/graph_mask.hpp index af5b9b01764..2048d3692c7 100644 --- a/cpp/include/cugraph/graph_mask.hpp +++ b/cpp/include/cugraph/graph_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -128,7 +128,7 @@ struct graph_mask_view_t { ~graph_mask_view_t() = default; graph_mask_view_t(graph_mask_view_t&&) noexcept = default; - graph_mask_view_t& operator=(graph_mask_view_t&&) noexcept = default; + graph_mask_view_t& operator=(graph_mask_view_t&&) noexcept = default; graph_mask_view_t& operator=(graph_mask_view_t const& other) = default; /** @@ -231,7 +231,7 @@ struct graph_mask_t { { } - graph_mask_t& operator=(graph_mask_t&&) noexcept = default; + graph_mask_t& operator=(graph_mask_t&&) noexcept = default; graph_mask_t& operator=(graph_mask_t const& other) = default; /** diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index 03bfc6c8045..2d10b435224 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -26,8 +27,6 @@ #include #include -#include - #include #include #include @@ -447,6 +446,7 @@ class graph_view_t local_edge_partition_view( size_t partition_idx) const { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); + vertex_t major_range_first{}; vertex_t major_range_last{}; vertex_t minor_range_first{}; @@ -737,6 +739,15 @@ class graph_view_t edge_mask_view) + { + edge_mask_view_ = edge_mask_view; + } + + void clear_edge_mask() { edge_mask_view_ = std::nullopt; } + + bool has_edge_mask() const { return edge_mask_view_.has_value(); } + private: std::vector edge_partition_offsets_{}; std::vector edge_partition_indices_{}; @@ -782,6 +793,8 @@ class graph_view_t>, std::optional /* dummy */> local_sorted_unique_edge_dst_vertex_partition_offsets_{std::nullopt}; + + std::optional> edge_mask_view_{std::nullopt}; }; // single-GPU version @@ -1008,12 +1021,23 @@ class graph_view_t edge_mask_view) + { + edge_mask_view_ = edge_mask_view; + } + + void clear_edge_mask() { edge_mask_view_ = std::nullopt; } + + bool has_edge_mask() const { return edge_mask_view_.has_value(); } + private: edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; // segment offsets based on vertex degree, relevant only if vertex IDs are renumbered std::optional> segment_offsets_{std::nullopt}; + + std::optional> edge_mask_view_{std::nullopt}; }; } // namespace cugraph diff --git a/cpp/include/cugraph/legacy/graph.hpp b/cpp/include/cugraph/legacy/graph.hpp index d207a0a1603..8276853ce7e 100644 --- a/cpp/include/cugraph/legacy/graph.hpp +++ b/cpp/include/cugraph/legacy/graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -553,23 +553,19 @@ template struct invalid_idx< T, typename std::enable_if_t::value && std::is_signed::value>> - : std::integral_constant { -}; + : std::integral_constant {}; template struct invalid_idx< T, typename std::enable_if_t::value && std::is_unsigned::value>> - : std::integral_constant::max()> { -}; + : std::integral_constant::max()> {}; template -struct invalid_vertex_id : invalid_idx { -}; +struct invalid_vertex_id : invalid_idx {}; template -struct invalid_edge_id : invalid_idx { -}; +struct invalid_edge_id : invalid_idx {}; } // namespace legacy } // namespace cugraph diff --git a/cpp/include/cugraph/partition_manager.hpp b/cpp/include/cugraph/partition_manager.hpp index 433d99d4e01..309b169e646 100644 --- a/cpp/include/cugraph/partition_manager.hpp +++ b/cpp/include/cugraph/partition_manager.hpp @@ -16,6 +16,9 @@ #pragma once +#include +#include + #include #include @@ -93,6 +96,39 @@ class partition_manager { return std::string(map_major_comm_to_gpu_row_comm ? "gpu_col_comm" : "gpu_row_comm"); } + template + static std::vector compute_partition_range_lasts(raft::handle_t const& handle, + vertex_t local_partition_size) + { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto const major_comm_rank = major_comm.get_rank(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto const minor_comm_rank = minor_comm.get_rank(); + + auto vertex_counts = host_scalar_allgather(comm, local_partition_size, handle.get_stream()); + auto vertex_partition_ids = + host_scalar_allgather(comm, + partition_manager::compute_vertex_partition_id_from_graph_subcomm_ranks( + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank), + handle.get_stream()); + + std::vector vertex_partition_range_offsets(comm_size + 1, 0); + for (int i = 0; i < comm_size; ++i) { + vertex_partition_range_offsets[vertex_partition_ids[i]] = vertex_counts[i]; + } + std::exclusive_scan(vertex_partition_range_offsets.begin(), + vertex_partition_range_offsets.end(), + vertex_partition_range_offsets.begin(), + vertex_t{0}); + + return std::vector(vertex_partition_range_offsets.begin() + 1, + vertex_partition_range_offsets.end()); + } + static void init_subcomm(raft::handle_t& handle, int gpu_row_comm_size) { auto& comm = handle.get_comms(); diff --git a/cpp/include/cugraph/utilities/atomic_ops.cuh b/cpp/include/cugraph/utilities/atomic_ops.cuh new file mode 100644 index 00000000000..6af9841d71f --- /dev/null +++ b/cpp/include/cugraph/utilities/atomic_ops.cuh @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace cugraph { + +namespace detail { + +template +__device__ constexpr TupleType thrust_tuple_atomic_and(Iterator iter, + TupleType tup, + std::index_sequence) +{ + return thrust::make_tuple( + atomicAnd(&(thrust::raw_reference_cast(thrust::get(*iter))), thrust::get(tup))...); +} + +template +__device__ constexpr TupleType thrust_tuple_atomic_or(Iterator iter, + TupleType tup, + std::index_sequence) +{ + return thrust::make_tuple( + atomicOr(&(thrust::raw_reference_cast(thrust::get(*iter))), thrust::get(tup))...); +} + +template +__device__ constexpr TupleType thrust_tuple_atomic_add(Iterator iter, + TupleType tup, + std::index_sequence) +{ + return thrust::make_tuple( + atomicAdd(&(thrust::raw_reference_cast(thrust::get(*iter))), thrust::get(tup))...); +} + +template +__device__ constexpr TupleType thrust_tuple_elementwise_atomic_cas(Iterator iter, + TupleType comp_tup, + TupleType val_tup, + std::index_sequence) +{ + return thrust::make_tuple(atomicCAS(&(thrust::raw_reference_cast(thrust::get(*iter))), + thrust::get(comp_tup), + thrust::get(val_tup))...); +} + +template +__device__ constexpr TupleType thrust_tuple_elementwise_atomic_min(Iterator iter, + TupleType tup, + std::index_sequence) +{ + return thrust::make_tuple( + atomicMin(&(thrust::raw_reference_cast(thrust::get(*iter))), thrust::get(tup))...); +} + +template +__device__ constexpr TupleType thrust_tuple_elementwise_atomic_max(Iterator iter, + TupleType tup, + std::index_sequence) +{ + return thrust::make_tuple( + atomicMax(&(thrust::raw_reference_cast(thrust::get(*iter))), thrust::get(tup))...); +} + +} // namespace detail + +template +__device__ std::enable_if_t::value, void> atomic_and( + Iterator iter, T value) +{ + // no-op +} + +template +__device__ + std::enable_if_t && + std::is_same_v::value_type, T>, + T> + atomic_and(Iterator iter, T value) +{ + return atomicAnd(&(thrust::raw_reference_cast(*iter)), value); +} + +template +__device__ + std::enable_if_t::value && + std::is_same_v::value_type, T>, + T> + atomic_and(Iterator iter, T value) +{ + detail::thrust_tuple_atomic_and( + iter, value, std::make_index_sequence::value>{}); +} + +template +__device__ std::enable_if_t::value, void> atomic_or( + Iterator iter, T value) +{ + // no-op +} + +template +__device__ + std::enable_if_t && + std::is_same_v::value_type, T>, + T> + atomic_or(Iterator iter, T value) +{ + return atomicOr(&(thrust::raw_reference_cast(*iter)), value); +} + +template +__device__ + std::enable_if_t::value && + std::is_same_v::value_type, T>, + T> + atomic_or(Iterator iter, T value) +{ + detail::thrust_tuple_atomic_or( + iter, value, std::make_index_sequence::value>{}); +} + +template +__device__ std::enable_if_t::value, void> atomic_add( + Iterator iter, T value) +{ + // no-op +} + +template +__device__ + std::enable_if_t && + std::is_same_v::value_type, T>, + void> + atomic_add(Iterator iter, T value) +{ + atomicAdd(&(thrust::raw_reference_cast(*iter)), value); +} + +template +__device__ + std::enable_if_t::value_type>::value && + is_thrust_tuple::value, + void> + atomic_add(Iterator iter, T value) +{ + static_assert(thrust::tuple_size::value_type>::value == + thrust::tuple_size::value); + detail::thrust_tuple_atomic_add( + iter, value, std::make_index_sequence::value>{}); +} + +template +__device__ + std::enable_if_t && + std::is_same_v::value_type, T>, + T> + elementwise_atomic_cas(Iterator iter, T compare, T value) +{ + return atomicCAS(&(thrust::raw_reference_cast(*iter)), compare, value); +} + +template +__device__ + std::enable_if_t::value && + std::is_same_v::value_type, T>, + T> + elementwise_atomic_cas(Iterator iter, T compare, T value) +{ + detail::thrust_tuple_elementwise_atomic_cas( + iter, compare, value, std::make_index_sequence::value>{}); +} + +template +__device__ std::enable_if_t::value, void> +elementwise_atomic_min(Iterator iter, T const& value) +{ + // no-op +} + +template +__device__ + std::enable_if_t::value_type, T>::value && + std::is_arithmetic::value, + void> + elementwise_atomic_min(Iterator iter, T const& value) +{ + atomicMin(&(thrust::raw_reference_cast(*iter)), value); +} + +template +__device__ + std::enable_if_t::value_type>::value && + is_thrust_tuple::value, + void> + elementwise_atomic_min(Iterator iter, T const& value) +{ + static_assert(thrust::tuple_size::value_type>::value == + thrust::tuple_size::value); + detail::thrust_tuple_elementwise_atomic_min( + iter, value, std::make_index_sequence::value>{}); +} + +template +__device__ std::enable_if_t::value, void> +elementwise_atomic_max(Iterator iter, T const& value) +{ + // no-op +} + +template +__device__ + std::enable_if_t::value_type, T>::value && + std::is_arithmetic::value, + void> + elementwise_atomic_max(Iterator iter, T const& value) +{ + atomicMax(&(thrust::raw_reference_cast(*iter)), value); +} + +template +__device__ + std::enable_if_t::value_type>::value && + is_thrust_tuple::value, + void> + elementwise_atomic_max(Iterator iter, T const& value) +{ + static_assert(thrust::tuple_size::value_type>::value == + thrust::tuple_size::value); + detail::thrust_tuple_elementwise_atomic_max( + iter, value, std::make_index_sequence::value>{}); +} + +} // namespace cugraph diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp index 91dbe2c701e..2573752cb98 100644 --- a/cpp/include/cugraph/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,189 +25,11 @@ namespace cugraph { namespace cython { -enum class numberTypeEnum : int { int32Type, int64Type, floatType, doubleType }; - -// replacement for std::tuple<,,>, since std::tuple is not -// supported in cython -// -template -struct major_minor_weights_t { - explicit major_minor_weights_t(raft::handle_t const& handle) - : shuffled_major_vertices_(0, handle.get_stream()), - shuffled_minor_vertices_(0, handle.get_stream()), - shuffled_weights_(0, handle.get_stream()) - { - } - - rmm::device_uvector& get_major(void) { return shuffled_major_vertices_; } - - rmm::device_uvector& get_minor(void) { return shuffled_minor_vertices_; } - - rmm::device_uvector& get_weights(void) { return shuffled_weights_; } - - std::vector& get_edge_counts(void) { return edge_counts_; } - - std::pair, size_t> get_major_wrap( - void) // const: triggers errors in Cython autogen-ed C++ - { - return std::make_pair(std::make_unique(shuffled_major_vertices_.release()), - sizeof(vertex_t)); - } - - std::pair, size_t> get_minor_wrap(void) // const - { - return std::make_pair(std::make_unique(shuffled_minor_vertices_.release()), - sizeof(vertex_t)); - } - - std::pair, size_t> get_weights_wrap(void) // const - { - return std::make_pair(std::make_unique(shuffled_weights_.release()), - sizeof(weight_t)); - } - - std::unique_ptr> get_edge_counts_wrap(void) // const - { - return std::make_unique>(edge_counts_); - } - - private: - rmm::device_uvector shuffled_major_vertices_; - rmm::device_uvector shuffled_minor_vertices_; - rmm::device_uvector shuffled_weights_; - std::vector edge_counts_{}; -}; - struct graph_generator_t { std::unique_ptr d_source; std::unique_ptr d_destination; }; -// wrapper for renumber_edgelist() return -// (unrenumbering maps, etc.) -// -template -struct renum_tuple_t { - explicit renum_tuple_t(raft::handle_t const& handle) : dv_(0, handle.get_stream()), part_() {} - - rmm::device_uvector& get_dv(void) { return dv_; } - - std::pair, size_t> get_dv_wrap( - void) // const: see above explanation - { - return std::make_pair(std::make_unique(dv_.release()), sizeof(vertex_t)); - } - - cugraph::partition_t& get_partition(void) { return part_; } - vertex_t& get_num_vertices(void) { return nv_; } - edge_t& get_num_edges(void) { return ne_; } - - std::vector& get_segment_offsets(void) { return segment_offsets_; } - - std::unique_ptr> get_segment_offsets_wrap() - { // const - return std::make_unique>(segment_offsets_); - } - - // `partition_t` pass-through getters - // - int get_part_row_size() const { return part_.row_comm_size(); } - - int get_part_col_size() const { return part_.col_comm_size(); } - - int get_part_comm_rank() const { return part_.comm_rank(); } - - // FIXME: part_.vertex_partition_offsets() returns a std::vector - // - std::unique_ptr> get_partition_offsets_wrap(void) // const - { - return std::make_unique>(part_.vertex_partition_range_offsets()); - } - - std::pair get_part_local_vertex_range() const - { - auto tpl_v = part_.local_vertex_partition_range(); - return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); - } - - vertex_t get_part_local_vertex_first() const - { - return part_.local_vertex_partition_range_first(); - } - - vertex_t get_part_local_vertex_last() const { return part_.local_vertex_partition_range_last(); } - - std::pair get_part_vertex_partition_range(size_t vertex_partition_idx) const - { - auto tpl_v = part_.vertex_partition_range(vertex_partition_idx); - return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); - } - - vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) const - { - return part_.vertex_partition_range_first(vertex_partition_idx); - } - - vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) const - { - return part_.vertex_partition_range_last(vertex_partition_idx); - } - - vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) const - { - return part_.vertex_partition_range_size(vertex_partition_idx); - } - - size_t get_part_number_of_matrix_partitions() const - { - return part_.number_of_local_edgex_partitions(); - } - - std::pair get_part_matrix_partition_major_range(size_t partition_idx) const - { - auto tpl_v = part_.local_edgex_partition_major_range(partition_idx); - return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); - } - - vertex_t get_part_matrix_partition_major_first(size_t partition_idx) const - { - return part_.local_edge_partition_major_first(partition_idx); - } - - vertex_t get_part_matrix_partition_major_last(size_t partition_idx) const - { - return part_.local_edge_partition_major_range_last(partition_idx); - } - - vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) const - { - return part_.local_edge_partition_major_value_start_offset(partition_idx); - } - - std::pair get_part_matrix_partition_minor_range() const - { - auto tpl_v = part_.local_edge_partition_minor_range(); - return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); - } - - vertex_t get_part_matrix_partition_minor_first() const - { - return part_.local_edge_partition_minor_range_first(); - } - - vertex_t get_part_matrix_partition_minor_last() const - { - return part_.local_edge_partition_minor_range_last(); - } - - private: - rmm::device_uvector dv_; - cugraph::partition_t part_; - vertex_t nv_{0}; - edge_t ne_{0}; - std::vector segment_offsets_; -}; - // Wrapper for calling graph generator template std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, @@ -232,30 +54,6 @@ call_generate_rmat_edgelists(raft::handle_t const& handle, bool clip_and_flip, bool scramble_vertex_ids); -// wrapper for shuffling: -// -template -std::unique_ptr> call_shuffle( - raft::handle_t const& handle, - vertex_t* - edgelist_major_vertices, // [IN / OUT]: groupby_gpu_id_and_shuffle_values() sorts in-place - vertex_t* edgelist_minor_vertices, // [IN / OUT] - weight_t* edgelist_weights, // [IN / OUT] - edge_t num_edgelist_edges, - bool is_weighted); - -// Wrapper for calling renumber_edgelist() inplace: -// -template -std::unique_ptr> call_renumber( - raft::handle_t const& handle, - vertex_t* shuffled_edgelist_src_vertices /* [INOUT] */, - vertex_t* shuffled_edgelist_dst_vertices /* [INOUT] */, - std::vector const& edge_counts, - bool store_transposed, - bool do_expensive_check, - bool multi_gpu); - // Helper for setting up subcommunicators, typically called as part of the // user-initiated comms initialization in Python. // diff --git a/cpp/include/cugraph/utilities/dataframe_buffer.hpp b/cpp/include/cugraph/utilities/dataframe_buffer.hpp index 055391895d1..49898f6c855 100644 --- a/cpp/include/cugraph/utilities/dataframe_buffer.hpp +++ b/cpp/include/cugraph/utilities/dataframe_buffer.hpp @@ -72,9 +72,9 @@ struct dataframe_element { using type = void; }; -template -struct dataframe_element...>> { - using type = thrust::tuple; +template +struct dataframe_element...>> { + using type = thrust::tuple; }; template diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 1b1cf29057c..d29e7c47d14 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,6 +37,26 @@ struct typecast_t { __device__ output_t operator()(input_t val) const { return static_cast(val); } }; +template +struct pack_bool_t { + BoolIterator bool_first{}; + size_t num_bools{}; + + __device__ uint32_t operator()(size_t i) const + { + auto first = i * (sizeof(uint32_t) * 8); + auto last = std::min((i + 1) * (sizeof(uint32_t) * 8), num_bools); + uint32_t ret{0}; + for (auto j = first; j < last; ++j) { + if (*(bool_first + j)) { + auto mask = uint32_t{1} << (j % (sizeof(uint32_t) * 8)); + ret |= mask; + } + } + return ret; + } +}; + template struct indirection_t { Iterator first{}; diff --git a/cpp/include/cugraph/utilities/packed_bool_utils.hpp b/cpp/include/cugraph/utilities/packed_bool_utils.hpp new file mode 100644 index 00000000000..9557b11e8e0 --- /dev/null +++ b/cpp/include/cugraph/utilities/packed_bool_utils.hpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +namespace cugraph { + +namespace detail { + +template +constexpr std::enable_if_t::value_type>::value && + cugraph::is_thrust_tuple_of_arithmetic::value, + bool> +has_packed_bool_element(std::index_sequence) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value); + return (... || + (std::is_same_v::value_type>::type, + uint32_t> && + std::is_same_v::type, bool>)); +} + +} // namespace detail + +// sizeof(uint32_t) * 8 packed Boolean values are stored using one uint32_t +template +constexpr bool has_packed_bool_element() +{ + static_assert( + (std::is_arithmetic_v::value_type> && + std::is_arithmetic_v) || + (cugraph::is_thrust_tuple_of_arithmetic< + typename thrust::iterator_traits::value_type>::value && + cugraph::is_thrust_tuple_of_arithmetic::value)); + if constexpr (std::is_arithmetic_v::value_type> && + std::is_arithmetic_v) { + return std::is_same_v::value_type, uint32_t> && + std::is_same_v; + } else { + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value); + return detail::has_packed_bool_element( + std::make_index_sequence::value>()); + } +} + +constexpr size_t packed_bools_per_word() { return sizeof(uint32_t) * size_t{8}; } + +constexpr size_t packed_bool_size(size_t bool_size) +{ + return (bool_size + (sizeof(uint32_t) * 8 - 1)) / (sizeof(uint32_t) * 8); +} + +template +constexpr uint32_t packed_bool_mask(T bool_offset) +{ + return uint32_t{1} << (bool_offset % (sizeof(uint32_t) * 8)); +} + +constexpr uint32_t packed_bool_full_mask() { return uint32_t{0xffffffff}; } + +constexpr uint32_t packed_bool_empty_mask() { return uint32_t{0x0}; } + +template +constexpr T packed_bool_offset(T bool_offset) +{ + return bool_offset / (sizeof(uint32_t) * 8); +} + +} // namespace cugraph diff --git a/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp b/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp index cb3b8146153..d98754f51d1 100644 --- a/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp +++ b/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp @@ -78,16 +78,14 @@ auto std_tuple_to_thrust_tuple(TupleType tup, std::index_sequence) } template -constexpr TupleType thrust_tuple_of_arithmetic_numeric_limits_lowest(TupleType t, - std::index_sequence) +constexpr TupleType thrust_tuple_of_arithmetic_numeric_limits_lowest(std::index_sequence) { return thrust::make_tuple( std::numeric_limits::type>::lowest()...); } template -constexpr TupleType thrust_tuple_of_arithmetic_numeric_limits_max(TupleType t, - std::index_sequence) +constexpr TupleType thrust_tuple_of_arithmetic_numeric_limits_max(std::index_sequence) { return thrust::make_tuple( std::numeric_limits::type>::max()...); @@ -96,71 +94,59 @@ constexpr TupleType thrust_tuple_of_arithmetic_numeric_limits_max(TupleType t, } // namespace detail template -struct is_thrust_tuple : std::false_type { -}; +struct is_thrust_tuple : std::false_type {}; template -struct is_thrust_tuple> : std::true_type { -}; +struct is_thrust_tuple> : std::true_type {}; template -struct is_thrust_tuple_of_arithmetic : std::false_type { -}; +struct is_thrust_tuple_of_arithmetic : std::false_type {}; -template -struct is_thrust_tuple_of_arithmetic> { +template +struct is_thrust_tuple_of_arithmetic> { private: template static constexpr bool is_valid = std::is_arithmetic_v || std::is_same_v; public: - static constexpr bool value = (... && is_valid); + static constexpr bool value = (... && is_valid); }; template -struct is_std_tuple : std::false_type { -}; +struct is_std_tuple : std::false_type {}; template -struct is_std_tuple> : std::true_type { -}; +struct is_std_tuple> : std::true_type {}; template typename Vector> -struct is_arithmetic_vector : std::false_type { -}; +struct is_arithmetic_vector : std::false_type {}; template