Merge branch 'branch-24.10' into pyg-linkpred

rapidsai · Sep 30, 2024 · 8a2234f · 8a2234f
2 parents 0c129eb + 0f4fe8f
commit 8a2234f
Show file tree

Hide file tree

Showing 74 changed files with 1,477 additions and 493 deletions.
diff --git a/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh b/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh
@@ -30,7 +30,7 @@ algos="
 datasets="
    netscience
    email_Eu_core
-   cit_patents
+   cit-patents
    hollywood
    soc-livejournal
 "

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
@@ -17,7 +17,7 @@ cd "${package_dir}"
 
 python -m pip wheel \
     -w dist \
-    -vvv \
+    -v \
     --no-deps \
     --disable-pip-version-check \
     --extra-index-url https://pypi.nvidia.com \

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -45,8 +45,8 @@ function sed_runner() {
 echo "${NEXT_FULL_TAG}" > VERSION
 
 # Need to distutils-normalize the original version
-NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
-NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_UCXX_SHORT_TAG}'))")
+NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
+NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_UCXX_SHORT_TAG}'))")
 
 DEPENDENCIES=(
   cudf

diff --git a/ci/run_nx_cugraph_pytests.sh b/ci/run_nx_cugraph_pytests.sh
@@ -6,4 +6,5 @@ set -euo pipefail
 # Support invoking run_nx_cugraph_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/nx-cugraph/nx_cugraph
 
-pytest --capture=no --cache-clear --benchmark-disable "$@" tests
+NX_CUGRAPH_USE_COMPAT_GRAPHS=False pytest --capture=no --cache-clear --benchmark-disable "$@" tests
+NX_CUGRAPH_USE_COMPAT_GRAPHS=True pytest --capture=no --cache-clear --benchmark-disable "$@" tests
diff --git a/ci/test_python.sh b/ci/test_python.sh
@@ -108,7 +108,7 @@ echo "nx-cugraph coverage from networkx tests: $_coverage"
 echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }'
 # Ensure all algorithms were called by comparing covered lines to function lines.
 # Run our tests again (they're fast enough) to add their coverage, then create coverage.json
-pytest \
+NX_CUGRAPH_USE_COMPAT_GRAPHS=False pytest \
   --pyargs nx_cugraph \
   --config-file=../pyproject.toml \
   --cov-config=../pyproject.toml \
@@ -159,7 +159,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
       cugraph \
       cugraph-dgl \
       'dgl>=1.1.0.cu*,<=2.0.0.cu*' \
-      'pytorch>=2.0' \
+      'pytorch>=2.3,<2.4' \
       'cuda-version=11.8'
 
     rapids-print-env
@@ -198,10 +198,10 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
     # TODO re-enable logic once CUDA 12 is testable
     #if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
     CONDA_CUDA_VERSION="11.8"
-    PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html"
+    PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu118.html"
     #else
     #  CONDA_CUDA_VERSION="12.1"
-    #  PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html"
+    #  PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu121.html"
     #fi
 
     # Will automatically install built dependencies of cuGraph-PyG

diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
@@ -37,6 +37,7 @@ else
     DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL="1000s" \
     DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \
     DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \
+    NX_CUGRAPH_USE_COMPAT_GRAPHS=False \
     python -m pytest \
        -v \
        --import-mode=append \

diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh
@@ -32,18 +32,8 @@ fi
 PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}"
 DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html"
 
-# Starting from 2.2, PyTorch wheels depend on nvidia-nccl-cuxx>=2.19 wheel and
-# dynamically link to NCCL. RAPIDS CUDA 11 CI images have an older NCCL version that
-# might shadow the newer NCCL required by PyTorch during import (when importing
-# `cupy` before `torch`).
-if [[ "${NCCL_VERSION}" < "2.19" ]]; then
-  PYTORCH_VER="2.1.0"
-else
-  PYTORCH_VER="2.3.0"
-fi
-
 rapids-logger "Installing PyTorch and DGL"
-rapids-retry python -m pip install "torch==${PYTORCH_VER}" --index-url ${PYTORCH_URL}
+rapids-retry python -m pip install torch==2.3.0 --index-url ${PYTORCH_URL}
 rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL}
 
 python -m pytest python/cugraph-dgl/tests
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
@@ -29,13 +29,13 @@ export CI_RUN=1
 
 if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then
   PYTORCH_URL="https://download.pytorch.org/whl/cu118"
-  PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html"
+  PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu118.html"
 else
   PYTORCH_URL="https://download.pytorch.org/whl/cu121"
-  PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html"
+  PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu121.html"
 fi
 rapids-logger "Installing PyTorch and PyG dependencies"
-rapids-retry python -m pip install torch==2.1.0 --index-url ${PYTORCH_URL}
+rapids-retry python -m pip install torch==2.3.0 --index-url ${PYTORCH_URL}
 rapids-retry python -m pip install "torch-geometric>=2.5,<2.6"
 rapids-retry python -m pip install \
   ogb \

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -33,18 +33,17 @@ dependencies:
 - libraft==24.10.*,>=0.0.0a0
 - librmm==24.10.*,>=0.0.0a0
 - nbsphinx
-- nccl>=2.9.9
+- nccl>=2.19
 - networkx>=2.5.1
 - networkx>=3.0
 - ninja
 - notebook>=0.5.0
 - numba>=0.57
-- numpy>=1.23,<2.0a0
+- numpy>=1.23,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
 - ogb
 - openmpi
-- packaging>=21
 - pandas
 - pre-commit
 - pydantic
@@ -58,7 +57,7 @@ dependencies:
 - pytest-mpl
 - pytest-xdist
 - python-louvain
-- pytorch>=2.0,<2.2.0a0
+- pytorch>=2.3,<2.4.0a0
 - raft-dask==24.10.*,>=0.0.0a0
 - rapids-build-backend>=0.3.1,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0

diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -39,17 +39,16 @@ dependencies:
 - libraft==24.10.*,>=0.0.0a0
 - librmm==24.10.*,>=0.0.0a0
 - nbsphinx
-- nccl>=2.9.9
+- nccl>=2.19
 - networkx>=2.5.1
 - networkx>=3.0
 - ninja
 - notebook>=0.5.0
 - numba>=0.57
-- numpy>=1.23,<2.0a0
+- numpy>=1.23,<3.0a0
 - numpydoc
 - ogb
 - openmpi
-- packaging>=21
 - pandas
 - pre-commit
 - pydantic
@@ -63,7 +62,7 @@ dependencies:
 - pytest-mpl
 - pytest-xdist
 - python-louvain
-- pytorch>=2.0,<2.2.0a0
+- pytorch>=2.3,<2.4.0a0
 - raft-dask==24.10.*,>=0.0.0a0
 - rapids-build-backend>=0.3.1,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0

diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml
@@ -27,11 +27,11 @@ requirements:
     - cugraph ={{ version }}
     - dgl >=1.1.0.cu*
     - numba >=0.57
-    - numpy >=1.23,<2.0a0
+    - numpy >=1.23,<3.0a0
     - pylibcugraphops ={{ minor_version }}
     - tensordict >=0.1.2
     - python
-    - pytorch >=2.0
+    - pytorch >=2.3,<2.4.0a0
     - cupy >=12.0.0
 
 tests:

diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml
@@ -29,9 +29,9 @@ requirements:
   run:
     - rapids-dask-dependency ={{ minor_version }}
     - numba >=0.57
-    - numpy >=1.23,<2.0a0
+    - numpy >=1.23,<3.0a0
     - python
-    - pytorch >=2.0
+    - pytorch >=2.3,<2.4.0a0
     - cupy >=12.0.0
     - cugraph ={{ version }}
     - pylibcugraphops ={{ minor_version }}

diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml
@@ -63,7 +63,7 @@ outputs:
         - dask-cuda ={{ minor_version }}
         - dask-cudf ={{ minor_version }}
         - numba >=0.57
-        - numpy >=1.23,<2.0a0
+        - numpy >=1.23,<3.0a0
         - python
         - rapids-dask-dependency ={{ minor_version }}
         - thriftpy2 >=0.4.15,!=0.5.0,!=0.5.1

diff --git a/conda/recipes/libcugraph/conda_build_config.yaml b/conda/recipes/libcugraph/conda_build_config.yaml
@@ -17,7 +17,7 @@ doxygen_version:
   - ">=1.8.11"
 
 nccl_version:
-  - ">=2.9.9"
+  - ">=2.19"
 
 c_stdlib:
   - sysroot

diff --git a/cpp/include/cugraph/mtmg/instance_manager.hpp b/cpp/include/cugraph/mtmg/instance_manager.hpp
@@ -20,6 +20,8 @@
 
 #include <raft/comms/std_comms.hpp>
 
+#include <nccl.h>
+
 #include <vector>
 
 namespace cugraph {

diff --git a/cpp/include/cugraph/mtmg/resource_manager.hpp b/cpp/include/cugraph/mtmg/resource_manager.hpp
@@ -27,6 +27,8 @@
 #include <rmm/mr/device/owning_wrapper.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 
+#include <nccl.h>
+
 #include <execution>
 
 namespace cugraph {

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
@@ -220,6 +220,7 @@ function(ConfigureTestMG CMAKE_TEST_NAME)
         GPUS ${GPU_COUNT}
         PERCENT 100
         INSTALL_COMPONENT_SET testing_mg
+        INSTALL_TARGET ${CMAKE_TEST_NAME}
     )
     set_tests_properties(${CMAKE_TEST_NAME} PROPERTIES LABELS "CUGRAPH_MG")
 
@@ -302,6 +303,7 @@ function(ConfigureCTestMG CMAKE_TEST_NAME)
         GPUS ${GPU_COUNT}
         PERCENT 100
         INSTALL_COMPONENT_SET testing_mg
+        INSTALL_TARGET ${CMAKE_TEST_NAME}
     )
     set_tests_properties(${CMAKE_TEST_NAME} PROPERTIES LABELS "CUGRAPH_C_MG")
 

diff --git a/cpp/tests/mtmg/multi_node_threaded_test.cu b/cpp/tests/mtmg/multi_node_threaded_test.cu
@@ -39,6 +39,7 @@
 #include <thrust/unique.h>
 
 #include <gtest/gtest.h>
+#include <nccl.h>
 
 #include <filesystem>
 #include <fstream>

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -524,13 +524,13 @@ dependencies:
           - &dask rapids-dask-dependency==24.10.*,>=0.0.0a0
           - &dask_cuda dask-cuda==24.10.*,>=0.0.0a0
           - &numba numba>=0.57
-          - &numpy numpy>=1.23,<2.0a0
+          - &numpy numpy>=1.23,<3.0a0
       - output_types: conda
         packages:
           - aiohttp
           - fsspec>=0.6.0
           - requests
-          - nccl>=2.9.9
+          - nccl>=2.19
           - ucx-proc=*=gpu
           - &ucx_py_unsuffixed ucx-py==0.40.*,>=0.0.0a0
       - output_types: pyproject
@@ -688,15 +688,16 @@ dependencies:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - packaging>=21
             # not needed by nx-cugraph tests, but is required for running networkx tests
           - pytest-mpl
   cugraph_dgl_dev:
     common:
       - output_types: [conda]
         packages:
           - *cugraph_unsuffixed
-          - pytorch>=2.0
+          # ceiling could be removed when this is fixed:
+          # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/254
+          - &pytorch_conda pytorch>=2.3,<2.4.0a0
           - pytorch-cuda==11.8
           - &tensordict tensordict>=0.1.2
           - dgl>=1.1.0.cu*
@@ -705,7 +706,7 @@ dependencies:
       - output_types: [conda]
         packages:
           - *cugraph_unsuffixed
-          - pytorch>=2.0
+          - *pytorch_conda
           - pytorch-cuda==11.8
           - *tensordict
           - pyg>=2.5,<2.6
@@ -714,7 +715,7 @@ dependencies:
     common:
       - output_types: [conda]
         packages:
-          - &pytorch_unsuffixed pytorch>=2.0,<2.2.0a0
+          - *pytorch_conda
           - torchdata
           - pydantic
           - ogb
@@ -734,7 +735,7 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - &pytorch_pip torch>=2.0,<2.2.0a0
+              - &pytorch_pip torch>=2.3,<2.4.0a0
               - *tensordict
           - matrix: {cuda: "11.*"}
             packages:

diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml
@@ -19,7 +19,7 @@ dependencies:
 - pytest-cov
 - pytest-xdist
 - pytorch-cuda==11.8
-- pytorch>=2.0
+- pytorch>=2.3,<2.4.0a0
 - scipy
 - tensordict>=0.1.2
 name: cugraph_dgl_dev_cuda-118
diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py
@@ -18,7 +18,7 @@
 
 from typing import Sequence, Optional, Union, List, Tuple, Iterator
 
-from cugraph.gnn import UniformNeighborSampler, DistSampleWriter
+from cugraph.gnn import UniformNeighborSampler, BiasedNeighborSampler, DistSampleWriter
 from cugraph.utilities.utils import import_optional
 
 import cugraph_dgl
@@ -93,7 +93,6 @@ def __init__(
             If provided, the probability of each neighbor being
             sampled is proportional to the edge feature
             with the given name.  Mutually exclusive with mask.
-            Currently unsupported.
         mask: str
             Optional.
             If proivided, only neighbors where the edge mask
@@ -133,10 +132,6 @@ def __init__(
             raise NotImplementedError(
                 "Edge masking is currently unsupported by cuGraph-DGL"
             )
-        if prob:
-            raise NotImplementedError(
-                "Edge masking is currently unsupported by cuGraph-DGL"
-            )
         if prefetch_edge_feats:
             warnings.warn("'prefetch_edge_feats' is ignored by cuGraph-DGL")
         if prefetch_node_feats:
@@ -146,6 +141,8 @@ def __init__(
         if fused:
             warnings.warn("'fused' is ignored by cuGraph-DGL")
 
+        self.__prob_attr = prob
+
         self.fanouts = fanouts_per_layer
         reverse_fanouts = fanouts_per_layer.copy()
         reverse_fanouts.reverse()
@@ -180,8 +177,14 @@ def sample(
             format=kwargs.pop("format", "parquet"),
         )
 
-        ds = UniformNeighborSampler(
-            g._graph(self.edge_dir),
+        sampling_clx = (
+            UniformNeighborSampler
+            if self.__prob_attr is None
+            else BiasedNeighborSampler
+        )
+
+        ds = sampling_clx(
+            g._graph(self.edge_dir, prob_attr=self.__prob_attr),
             writer,
             compression="CSR",
             fanout=self._reversed_fanout_vals,