From 76190e2a18b0b0d2765397f1a7729e0f1477bec1 Mon Sep 17 00:00:00 2001
From: Naim <110031745+naimnv@users.noreply.github.com>
Date: Wed, 5 Jul 2023 19:28:48 +0200
Subject: [PATCH 1/3] [FIX] Rename `cugraph-ops` symbols (refactoring) and
 update GHA workflows to call pytest via `python -m pytest` (#3688)

This PR:
- renames `cugraph-ops` symbols and updates tests in `cugraph-dgl` and `-pyg` based on cugraph-ops refactoring
- updates GHA workflows to call pytest via `python -m pytest`. This is to fix the `pytest not found error` in [log](https://github.com/rapidsai/cugraph/actions/runs/5420960384/jobs/9855784044#step:9:260).

Authors:
  - Naim (https://github.com/naimnv)
  - Matt Joux (https://github.com/MatthiasKohl)
  - Tingyu Wang (https://github.com/tingyu66)

Approvers:
  - Matt Joux (https://github.com/MatthiasKohl)
  - Ray Douglass (https://github.com/raydouglass)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)
  - Rick Ratzel (https://github.com/rlratzel)

URL: https://github.com/rapidsai/cugraph/pull/3688
---
 .github/workflows/pr.yaml                     |  4 +-
 .github/workflows/test.yaml                   |  4 +-
 cpp/src/sampling/neighborhood.cu              | 22 ++++--
 cpp/src/utilities/cugraph_ops_utils.hpp       | 24 +++----
 .../cugraph_dgl/nn/conv/gatconv.py            | 71 +++++++------------
 .../cugraph_dgl/nn/conv/transformerconv.py    | 43 +++++------
 .../tests/nn/test_transformerconv.py          |  6 +-
 .../cugraph-pyg/cugraph_pyg/nn/conv/base.py   | 56 +++++++--------
 .../cugraph_pyg/nn/conv/gat_conv.py           | 35 +++------
 .../cugraph_pyg/nn/conv/gatv2_conv.py         | 52 +++++---------
 .../cugraph_pyg/nn/conv/transformer_conv.py   |  8 +--
 11 files changed, 138 insertions(+), 187 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 1518d7ba432..4d52cd26de4 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -94,7 +94,7 @@ jobs:
     with:
       build_type: pull-request
       package-name: pylibcugraph
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest ./python/pylibcugraph/pylibcugraph/tests"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets python -m pytest ./python/pylibcugraph/pylibcugraph/tests"
       test-smoketest: "python ci/wheel_smoke_test_pylibcugraph.py"
   wheel-build-cugraph:
     needs: wheel-tests-pylibcugraph
@@ -120,5 +120,5 @@ jobs:
       test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08"
       # Skip dataset downloads on arm to save CI time -- arm only runs smoke tests.
       test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08"
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -m sg ./python/cugraph/cugraph/tests"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets python -m pytest -m sg ./python/cugraph/cugraph/tests"
       test-smoketest: "python ci/wheel_smoke_test_cugraph.py"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 33cc3f27825..d697b8f1649 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -39,7 +39,7 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       package-name: pylibcugraph
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest ./python/pylibcugraph/pylibcugraph/tests"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets python -m pytest ./python/pylibcugraph/pylibcugraph/tests"
   wheel-tests-cugraph:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.08
@@ -52,4 +52,4 @@ jobs:
       # Always want to test against latest dask/distributed.
       test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08"
       test-before-arm64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08"
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -m sg ./python/cugraph/cugraph/tests"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets python -m pytest -m sg ./python/cugraph/cugraph/tests"
diff --git a/cpp/src/sampling/neighborhood.cu b/cpp/src/sampling/neighborhood.cu
index 0c0beb8d8b0..2f7b203a319 100644
--- a/cpp/src/sampling/neighborhood.cu
+++ b/cpp/src/sampling/neighborhood.cu
@@ -22,6 +22,8 @@
 
 #include <raft/random/rng_state.hpp>
 
+#include <type_traits>
+
 namespace cugraph {
 
 template <typename vertex_t, typename edge_t>
@@ -34,14 +36,19 @@ sample_neighbors_adjacency_list(raft::handle_t const& handle,
                                 size_t sampling_size,
                                 ops::graph::SamplingAlgoT sampling_algo)
 {
-  const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph_view);
-  return ops::graph::uniform_sample_csr(rng_state,
+  using base_vertex_t = std::decay_t<vertex_t>;
+  using base_edge_t   = std::decay_t<edge_t>;
+  static_assert(std::is_same_v<base_vertex_t, base_edge_t>,
+                "cugraph-ops sampling not yet implemented for different node and edge types");
+
+  const auto ops_graph = detail::get_graph(graph_view);
+  return ops::graph::uniform_sample_csc(rng_state,
                                         ops_graph,
                                         ptr_d_start,
                                         num_start_vertices,
                                         sampling_size,
                                         sampling_algo,
-                                        max_degree,
+                                        ops_graph.dst_max_in_degree,
                                         handle.get_stream());
 }
 
@@ -55,14 +62,19 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> sample_
   size_t sampling_size,
   ops::graph::SamplingAlgoT sampling_algo)
 {
-  const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph_view);
+  using base_vertex_t = std::decay_t<vertex_t>;
+  using base_edge_t   = std::decay_t<edge_t>;
+  static_assert(std::is_same_v<base_vertex_t, base_edge_t>,
+                "cugraph-ops sampling not yet implemented for different node and edge types");
+
+  const auto ops_graph = detail::get_graph(graph_view);
   return ops::graph::uniform_sample_coo(rng_state,
                                         ops_graph,
                                         ptr_d_start,
                                         num_start_vertices,
                                         sampling_size,
                                         sampling_algo,
-                                        max_degree,
+                                        ops_graph.dst_max_in_degree,
                                         handle.get_stream());
 }
 
diff --git a/cpp/src/utilities/cugraph_ops_utils.hpp b/cpp/src/utilities/cugraph_ops_utils.hpp
index 1dbe930e4c9..9aea4183866 100644
--- a/cpp/src/utilities/cugraph_ops_utils.hpp
+++ b/cpp/src/utilities/cugraph_ops_utils.hpp
@@ -20,18 +20,20 @@
 
 #include <cugraph-ops/graph/format.hpp>
 
-#include <tuple>
-
 namespace cugraph {
 namespace detail {
 
 template <typename NodeTypeT, typename EdgeTypeT>
-ops::graph::fg_csr<EdgeTypeT> get_graph(
+ops::graph::csc<EdgeTypeT, NodeTypeT> get_graph(
   graph_view_t<NodeTypeT, EdgeTypeT, false, false> const& gview)
 {
-  ops::graph::fg_csr<EdgeTypeT> graph;
-  graph.n_nodes   = gview.number_of_vertices();
-  graph.n_indices = gview.number_of_edges();
+  ops::graph::csc<EdgeTypeT, NodeTypeT> graph;
+  graph.n_src_nodes = gview.number_of_vertices();
+  graph.n_dst_nodes = gview.number_of_vertices();
+  graph.n_indices   = gview.number_of_edges();
+  // FIXME this is sufficient for now, but if there is a fast (cached) way
+  // of getting max degree, use that instead
+  graph.dst_max_in_degree = std::numeric_limits<EdgeTypeT>::max();
   // FIXME: this is evil and is just temporary until we have a matching type in cugraph-ops
   // or we change the type accepted by the functions calling into cugraph-ops
   graph.offsets = const_cast<EdgeTypeT*>(gview.local_edge_partition_view().offsets().data());
@@ -39,15 +41,5 @@ ops::graph::fg_csr<EdgeTypeT> get_graph(
   return graph;
 }
 
-template <typename NodeTypeT, typename EdgeTypeT>
-std::tuple<ops::graph::fg_csr<EdgeTypeT>, NodeTypeT> get_graph_and_max_degree(
-  graph_view_t<NodeTypeT, EdgeTypeT, false, false> const& gview)
-{
-  // FIXME this is sufficient for now, but if there is a fast (cached) way
-  // of getting max degree, use that instead
-  auto max_degree = std::numeric_limits<NodeTypeT>::max();
-  return std::make_tuple(get_graph(gview), max_degree);
-}
-
 }  // namespace detail
 }  // namespace cugraph
diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py
index e70f2d0c6d1..7825febc24b 100644
--- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py
+++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py
@@ -19,8 +19,8 @@
 from cugraph_dgl.nn.conv.base import BaseConv
 from cugraph.utilities.utils import import_optional
 
-from pylibcugraphops.pytorch import BipartiteCSC, SampledCSC, StaticCSC
-from pylibcugraphops.pytorch.operators import mha_gat_n2n, mha_gat_n2n_bipartite
+from pylibcugraphops.pytorch import CSC
+from pylibcugraphops.pytorch.operators import mha_gat_n2n
 
 dgl = import_optional("dgl")
 torch = import_optional("torch")
@@ -173,9 +173,20 @@ def forward(
             :math:`H` is the number of heads, and :math:`D_{out}` is size of
             output feature.
         """
+        if max_in_degree is None:
+            max_in_degree = -1
+
         bipartite = not isinstance(nfeat, torch.Tensor)
         offsets, indices, _ = g.adj_tensors("csc")
 
+        graph = CSC(
+            offsets=offsets,
+            indices=indices,
+            num_src_nodes=g.num_src_nodes(),
+            dst_max_in_degree=max_in_degree,
+            is_bipartite=bipartite,
+        )
+
         if efeat is not None:
             if self.fc_edge is None:
                 raise RuntimeError(
@@ -191,23 +202,8 @@ def forward(
                     f"integers to allow bipartite node features, but got "
                     f"{self.in_feats}."
                 )
-            _graph = BipartiteCSC(
-                offsets=offsets, indices=indices, num_src_nodes=g.num_src_nodes()
-            )
             nfeat_src = self.fc_src(nfeat[0])
             nfeat_dst = self.fc_dst(nfeat[1])
-
-            out = mha_gat_n2n_bipartite(
-                src_feat=nfeat_src,
-                dst_feat=nfeat_dst,
-                attn_weights=self.attn_weights,
-                graph=_graph,
-                num_heads=self.num_heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=efeat,
-            )
         else:
             if not hasattr(self, "fc"):
                 raise RuntimeError(
@@ -215,36 +211,17 @@ def forward(
                     f"integer, but got {self.in_feats}."
                 )
             nfeat = self.fc(nfeat)
-            # Sampled primitive does not support edge features
-            if g.is_block and efeat is None:
-                if max_in_degree is None:
-                    max_in_degree = g.in_degrees().max().item()
-
-                if max_in_degree < self.MAX_IN_DEGREE_MFG:
-                    _graph = SampledCSC(
-                        offsets=offsets,
-                        indices=indices,
-                        max_num_neighbors=max_in_degree,
-                        num_src_nodes=g.num_src_nodes(),
-                    )
-                else:
-                    offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1)
-                    _graph = StaticCSC(offsets=offsets, indices=indices)
-            else:
-                if g.is_block:
-                    offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1)
-                _graph = StaticCSC(offsets=offsets, indices=indices)
-
-            out = mha_gat_n2n(
-                feat=nfeat,
-                attn_weights=self.attn_weights,
-                graph=_graph,
-                num_heads=self.num_heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=efeat,
-            )[: g.num_dst_nodes()]
+
+        out = mha_gat_n2n(
+            (nfeat_src, nfeat_dst) if bipartite else nfeat,
+            self.attn_weights,
+            graph,
+            num_heads=self.num_heads,
+            activation="LeakyReLU",
+            negative_slope=self.negative_slope,
+            concat_heads=self.concat,
+            edge_feat=efeat,
+        )[: g.num_dst_nodes()]
 
         if self.concat:
             out = out.view(-1, self.num_heads, self.out_feats)
diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py
index 1898f5159b1..141adc86069 100644
--- a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py
+++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py
@@ -15,7 +15,7 @@
 from cugraph_dgl.nn.conv.base import BaseConv
 from cugraph.utilities.utils import import_optional
 
-from pylibcugraphops.pytorch import BipartiteCSC, StaticCSC
+from pylibcugraphops.pytorch import CSC
 from pylibcugraphops.pytorch.operators import mha_simple_n2n
 
 dgl = import_optional("dgl")
@@ -132,31 +132,34 @@ def forward(
         efeat: torch.Tensor, optional
             Edge feature tensor. Default: ``None``.
         """
-        bipartite = not isinstance(nfeat, torch.Tensor)
         offsets, indices, _ = g.adj_tensors("csc")
-
-        if bipartite:
-            src_feats, dst_feats = nfeat
-            _graph = BipartiteCSC(
-                offsets=offsets, indices=indices, num_src_nodes=g.num_src_nodes()
-            )
-        else:
-            src_feats = dst_feats = nfeat
-            if g.is_block:
-                offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1)
-            _graph = StaticCSC(offsets=offsets, indices=indices)
-
-        query = self.lin_query(dst_feats)
-        key = self.lin_key(src_feats)
-        value = self.lin_value(src_feats)
-        if self.lin_edge is not None:
+        graph = CSC(
+            offsets=offsets,
+            indices=indices,
+            num_src_nodes=g.num_src_nodes(),
+            is_bipartite=True,
+        )
+
+        if isinstance(nfeat, torch.Tensor):
+            nfeat = (nfeat, nfeat)
+
+        query = self.lin_query(nfeat[1][: g.num_dst_nodes()])
+        key = self.lin_key(nfeat[0])
+        value = self.lin_value(nfeat[0])
+
+        if efeat is not None:
+            if self.lin_edge is None:
+                raise RuntimeError(
+                    f"{self.__class__.__name__}.edge_feats must be set to allow "
+                    f"edge features."
+                )
             efeat = self.lin_edge(efeat)
 
         out = mha_simple_n2n(
             key_emb=key,
             query_emb=query,
             value_emb=value,
-            graph=_graph,
+            graph=graph,
             num_heads=self.num_heads,
             concat_heads=self.concat,
             edge_emb=efeat,
@@ -165,7 +168,7 @@ def forward(
         )[: g.num_dst_nodes()]
 
         if self.root_weight:
-            res = self.lin_skip(dst_feats[: g.num_dst_nodes()])
+            res = self.lin_skip(nfeat[1][: g.num_dst_nodes()])
             if self.lin_beta is not None:
                 beta = self.lin_beta(torch.cat([out, res, out - res], dim=-1))
                 beta = beta.sigmoid()
diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/tests/nn/test_transformerconv.py
index 64af795231c..00476b9f0bb 100644
--- a/python/cugraph-dgl/tests/nn/test_transformerconv.py
+++ b/python/cugraph-dgl/tests/nn/test_transformerconv.py
@@ -26,14 +26,14 @@
 
 
 @pytest.mark.parametrize("beta", [False, True])
-@pytest.mark.parametrize("bipartite", [False, True])
+@pytest.mark.parametrize("bipartite_node_feats", [False, True])
 @pytest.mark.parametrize("concat", [False, True])
 @pytest.mark.parametrize("idtype_int", [False, True])
 @pytest.mark.parametrize("num_heads", [1, 2, 3, 4])
 @pytest.mark.parametrize("to_block", [False, True])
 @pytest.mark.parametrize("use_edge_feats", [False, True])
 def test_TransformerConv(
-    beta, bipartite, concat, idtype_int, num_heads, to_block, use_edge_feats
+    beta, bipartite_node_feats, concat, idtype_int, num_heads, to_block, use_edge_feats
 ):
     device = "cuda"
     g = create_graph1().to(device)
@@ -44,7 +44,7 @@ def test_TransformerConv(
     if to_block:
         g = dgl.to_block(g)
 
-    if bipartite:
+    if bipartite_node_feats:
         in_node_feats = (5, 3)
         nfeat = (
             torch.rand(g.num_src_nodes(), in_node_feats[0], device=device),
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
index bec50792131..207efcdace4 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Any, Optional, Tuple, Union
+from typing import Optional, Tuple, Union
 
 from cugraph.utilities.utils import import_optional
 
@@ -20,13 +20,7 @@
 torch_geometric = import_optional("torch_geometric")
 
 try:  # pragma: no cover
-    from pylibcugraphops.pytorch import (
-        BipartiteCSC,
-        SampledCSC,
-        SampledHeteroCSC,
-        StaticCSC,
-        StaticHeteroCSC,
-    )
+    from pylibcugraphops.pytorch import CSC, HeteroCSC
 
     HAS_PYLIBCUGRAPHOPS = True
 except ImportError:
@@ -94,7 +88,7 @@ def get_cugraph(
         csc: Tuple[torch.Tensor, torch.Tensor, int],
         bipartite: bool = False,
         max_num_neighbors: Optional[int] = None,
-    ) -> Any:
+    ) -> CSC:
         r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation.
         Supports both bipartite and non-bipartite graphs.
 
@@ -119,16 +113,16 @@ def get_cugraph(
                 f"based processing (got CPU tensor)"
             )
 
-        if bipartite:
-            return BipartiteCSC(colptr, row, num_src_nodes)
+        if max_num_neighbors is None:
+            max_num_neighbors = -1
 
-        if num_src_nodes != colptr.numel() - 1:
-            if max_num_neighbors is None:
-                max_num_neighbors = int((colptr[1:] - colptr[:-1]).max())
-
-            return SampledCSC(colptr, row, max_num_neighbors, num_src_nodes)
-
-        return StaticCSC(colptr, row)
+        return CSC(
+            offsets=colptr,
+            indices=row,
+            num_src_nodes=num_src_nodes,
+            dst_max_in_degree=max_num_neighbors,
+            is_bipartite=bipartite,
+        )
 
     def get_typed_cugraph(
         self,
@@ -137,7 +131,7 @@ def get_typed_cugraph(
         num_edge_types: Optional[int] = None,
         bipartite: bool = False,
         max_num_neighbors: Optional[int] = None,
-    ) -> Any:
+    ) -> HeteroCSC:
         r"""Constructs a typed :obj:`cugraph` graph object from a CSC
         representation where each edge corresponds to a given edge type.
         Supports both bipartite and non-bipartite graphs.
@@ -162,21 +156,21 @@ def get_typed_cugraph(
         if num_edge_types is None:
             num_edge_types = int(edge_type.max()) + 1
 
+        if max_num_neighbors is None:
+            max_num_neighbors = -1
+
         row, colptr, num_src_nodes = csc
         edge_type = edge_type.int()
 
-        if bipartite:
-            raise NotImplementedError
-
-        if num_src_nodes != colptr.numel() - 1:
-            if max_num_neighbors is None:
-                max_num_neighbors = int((colptr[1:] - colptr[:-1]).max())
-
-            return SampledHeteroCSC(
-                colptr, row, edge_type, max_num_neighbors, num_src_nodes, num_edge_types
-            )
-
-        return StaticHeteroCSC(colptr, row, edge_type, num_edge_types)
+        return HeteroCSC(
+            offsets=colptr,
+            indices=row,
+            edge_types=edge_type,
+            num_src_nodes=num_src_nodes,
+            num_edge_types=num_edge_types,
+            dst_max_in_degree=max_num_neighbors,
+            is_bipartite=bipartite,
+        )
 
     def forward(
         self,
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
index 4bf37cf3e72..23b7d50ba96 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 from typing import Optional, Tuple, Union
 
-from pylibcugraphops.pytorch.operators import mha_gat_n2n, mha_gat_n2n_bipartite
+from pylibcugraphops.pytorch.operators import mha_gat_n2n
 
 from cugraph.utilities.utils import import_optional
 
@@ -203,19 +203,6 @@ def forward(
                 )
             x_src = self.lin_src(x[0])
             x_dst = self.lin_dst(x[1])
-
-            out = mha_gat_n2n_bipartite(
-                x_src,
-                x_dst,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
-
         else:
             if not hasattr(self, "lin"):
                 raise RuntimeError(
@@ -224,16 +211,16 @@ def forward(
                 )
             x = self.lin(x)
 
-            out = mha_gat_n2n(
-                x,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
+        out = mha_gat_n2n(
+            (x_src, x_dst) if bipartite else x,
+            self.att,
+            graph,
+            num_heads=self.heads,
+            activation="LeakyReLU",
+            negative_slope=self.negative_slope,
+            concat_heads=self.concat,
+            edge_feat=edge_attr,
+        )
 
         if self.bias is not None:
             out = out + self.bias
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
index 66d962b3f86..d4c947b952a 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 from typing import Optional, Tuple, Union
 
-from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n, mha_gat_v2_n2n_bipartite
+from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n
 
 from cugraph.utilities.utils import import_optional
 
@@ -187,8 +187,8 @@ def forward(
                 representation to the desired format.
             edge_attr: (torch.Tensor, optional) The edge features.
         """
-        bipartite = not isinstance(x, torch.Tensor)
-        graph = self.get_cugraph(csc, bipartite=bipartite or not self.share_weights)
+        bipartite = not isinstance(x, torch.Tensor) or not self.share_weights
+        graph = self.get_cugraph(csc, bipartite=bipartite)
 
         if edge_attr is not None:
             if self.lin_edge is None:
@@ -200,38 +200,24 @@ def forward(
                 edge_attr = edge_attr.view(-1, 1)
             edge_attr = self.lin_edge(edge_attr)
 
-        if not bipartite and self.share_weights:
+        if bipartite:
+            if isinstance(x, torch.Tensor):
+                x = (x, x)
+            x_src = self.lin_src(x[0])
+            x_dst = self.lin_dst(x[1])
+        else:
             x = self.lin_src(x)
 
-            out = mha_gat_v2_n2n(
-                x,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
-        else:
-            if bipartite:
-                x_src = self.lin_src(x[0])
-                x_dst = self.lin_dst(x[1])
-            else:
-                x_src = self.lin_src(x)
-                x_dst = self.lin_dst(x)
-
-            out = mha_gat_v2_n2n_bipartite(
-                x_src,
-                x_dst,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
+        out = mha_gat_v2_n2n(
+            (x_src, x_dst) if bipartite else x,
+            self.att,
+            graph,
+            num_heads=self.heads,
+            activation="LeakyReLU",
+            negative_slope=self.negative_slope,
+            concat_heads=self.concat,
+            edge_feat=edge_attr,
+        )
 
         if self.bias is not None:
             out = out + self.bias
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
index aeb51c028ae..f67756eb3fe 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 from typing import Optional, Tuple, Union
 
-from pylibcugraphops.pytorch.operators import mha_simple_n2n as TransformerConvAgg
+from pylibcugraphops.pytorch.operators import mha_simple_n2n
 
 from cugraph.utilities.utils import import_optional
 
@@ -168,10 +168,10 @@ def forward(
                 representation to the desired format.
             edge_attr: (torch.Tensor, optional) The edge features.
         """
-        bipartite = not isinstance(x, torch.Tensor)
+        bipartite = True
         graph = self.get_cugraph(csc, bipartite=bipartite)
 
-        if not bipartite:
+        if isinstance(x, torch.Tensor):
             x = (x, x)
 
         query = self.lin_query(x[1])
@@ -186,7 +186,7 @@ def forward(
                 )
             edge_attr = self.lin_edge(edge_attr)
 
-        out = TransformerConvAgg(
+        out = mha_simple_n2n(
             key,
             query,
             value,

From 3d1539b198ec63b4c77e1208f134a0985577cc0b Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com>
Date: Thu, 6 Jul 2023 08:27:12 -0700
Subject: [PATCH 2/3] Include cuCollection public header for hash functions
 (#3694)

Currently we are including `cuco/detail/hash_functions.cuh` but cuCollection now has `cuco/hash_functions.cuh`. Include the public one instead.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Naim (https://github.com/naimnv)
  - Joseph Nke (https://github.com/jnke2016)

URL: https://github.com/rapidsai/cugraph/pull/3694
---
 cpp/src/detail/graph_partition_utils.cuh                        | 2 +-
 cpp/tests/prims/mg_count_if_e.cu                                | 2 +-
 cpp/tests/prims/mg_count_if_v.cu                                | 2 +-
 cpp/tests/prims/mg_extract_transform_e.cu                       | 2 +-
 cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu   | 2 +-
 .../prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu      | 2 +-
 cpp/tests/prims/mg_reduce_v.cu                                  | 2 +-
 cpp/tests/prims/mg_transform_e.cu                               | 2 +-
 cpp/tests/prims/mg_transform_reduce_e.cu                        | 2 +-
 cpp/tests/prims/mg_transform_reduce_v.cu                        | 2 +-
 .../prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu   | 2 +-
 cpp/tests/prims/property_generator.cuh                          | 2 +-
 cpp/tests/sampling/detail/nbr_sampling_utils.cuh                | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/cpp/src/detail/graph_partition_utils.cuh b/cpp/src/detail/graph_partition_utils.cuh
index 88e9623e043..67574719b45 100644
--- a/cpp/src/detail/graph_partition_utils.cuh
+++ b/cpp/src/detail/graph_partition_utils.cuh
@@ -19,7 +19,7 @@
 
 #include <raft/core/device_span.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu
index bebb21bd720..449aa728d87 100644
--- a/cpp/tests/prims/mg_count_if_e.cu
+++ b/cpp/tests/prims/mg_count_if_e.cu
@@ -33,7 +33,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_count_if_v.cu b/cpp/tests/prims/mg_count_if_v.cu
index f90f788cfae..3d745708401 100644
--- a/cpp/tests/prims/mg_count_if_v.cu
+++ b/cpp/tests/prims/mg_count_if_v.cu
@@ -27,7 +27,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu
index 1c85b55e4be..b71fe5ddb5e 100644
--- a/cpp/tests/prims/mg_extract_transform_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_e.cu
@@ -35,7 +35,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
index 3cd6bd243e1..4d9435dd344 100644
--- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
@@ -34,7 +34,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
index 97d52c04114..677d6ce5022 100644
--- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
+++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
@@ -35,7 +35,7 @@
 #include <cugraph/utilities/high_res_timer.hpp>
 #include <cugraph/utilities/thrust_tuple_utils.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu
index 7080eb12da6..b6f8da48ef4 100644
--- a/cpp/tests/prims/mg_reduce_v.cu
+++ b/cpp/tests/prims/mg_reduce_v.cu
@@ -30,7 +30,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu
index ed29fb7c5e3..127eddd43c7 100644
--- a/cpp/tests/prims/mg_transform_e.cu
+++ b/cpp/tests/prims/mg_transform_e.cu
@@ -34,7 +34,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu
index 8dba488f23d..79aa3da54df 100644
--- a/cpp/tests/prims/mg_transform_reduce_e.cu
+++ b/cpp/tests/prims/mg_transform_reduce_e.cu
@@ -33,7 +33,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu
index 3ea7636a718..c9fc138ae1b 100644
--- a/cpp/tests/prims/mg_transform_reduce_v.cu
+++ b/cpp/tests/prims/mg_transform_reduce_v.cu
@@ -29,7 +29,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
index 2eb270973f2..d0b97065da7 100644
--- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
+++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
@@ -34,7 +34,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/property_generator.cuh b/cpp/tests/prims/property_generator.cuh
index 24a21c1cb01..e7264cd276f 100644
--- a/cpp/tests/prims/property_generator.cuh
+++ b/cpp/tests/prims/property_generator.cuh
@@ -21,7 +21,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/thrust_tuple_utils.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/core/handle.hpp>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh
index 00c14009e86..8221073f556 100644
--- a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh
+++ b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh
@@ -31,7 +31,7 @@
 #include <cugraph/utilities/high_res_timer.hpp>
 #include <cugraph/utilities/host_scalar_comm.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/core/handle.hpp>
 

From 037239686052ee4e07286dcf235d0aa1b0da0ff0 Mon Sep 17 00:00:00 2001
From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com>
Date: Thu, 6 Jul 2023 17:54:29 -0400
Subject: [PATCH 3/3] [FIX] Fix the hang in cuGraph Python Uniform Neighbor
 Sample, Add Logging to Bulk Sampler (#3669)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some dask operations were not being done correctly, and time was being lost in broadcasting the rank and label arrays to all workers.  This PR resolves those issues.

Also pulls in the previously-experimental changes that add logging to the bulk sampler.

Credit to @VibhuJawa for isolating and fixing the issues with the column merge in `uniform_neighbor_sample` and the new sampling notebook and shell script.

This PR does modify the sampling APIs so it is breaking.  The API changes are necessary to avoid unnecessary shuffling, and eventually, to improve batch id assignment.

**Dataset:** `ogbn_papers100M x 2`; **Fanout:** `[25, 25]`; **Batch Size:** `512`; **Seeds Per Call:** `524288`
Current runtime: 2.69 s ± 0 ns per loop (mean ± std. dev. of 1 run, 10 loops each)
Previous runtime: 4.51 s ± 0 ns per loop (mean ± std. dev. of 1 run, 10 loops each)
Speedup: 1.7x

**Dataset:** `ogbn_papers100M x 4`; **Fanout:** `[25, 25]`; **Batch Size:** `512`; **Seeds Per Call:** `524288`
Current runtime: 6.32 s ± 0 ns per loop (mean ± std. dev. of 1 run, 10 loops each)
Previous runtime: 10.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 10 loops each)
Speedup: 1.7x

Authors:
  - Alex Barghi (https://github.com/alexbarghi-nv)
  - Vibhu Jawa (https://github.com/VibhuJawa)

Approvers:
  - Vibhu Jawa (https://github.com/VibhuJawa)
  - Rick Ratzel (https://github.com/rlratzel)

URL: https://github.com/rapidsai/cugraph/pull/3669
---
 .../bulk_sampling/benchmarking_script.ipynb   | 1860 +++++++++++++++++
 .../standalone/bulk_sampling/bulk_sampling.sh |   50 +
 .../bulk_sampling/cugraph_bulk_sampling.py    |   16 +-
 mg_utils/run-dask-process.sh                  |    1 +
 .../dask/sampling/uniform_neighbor_sample.py  |  425 +++-
 .../cugraph/gnn/data_loading/bulk_sampler.py  |  106 +-
 .../gnn/data_loading/bulk_sampler_io.py       |   10 +-
 .../sampling/uniform_neighbor_sample.py       |  190 +-
 .../sampling/test_uniform_neighbor_sample.py  |   33 +-
 .../test_uniform_neighbor_sample_mg.py        |  142 +-
 10 files changed, 2689 insertions(+), 144 deletions(-)
 create mode 100644 benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb
 create mode 100755 benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh

diff --git a/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb b/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb
new file mode 100644
index 00000000000..3ea158d1f61
--- /dev/null
+++ b/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb
@@ -0,0 +1,1860 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9b8d43d5-3005-4b0b-b418-b84af104bc3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!export RAPIDS_NO_INITIALIZE=\"1\"\n",
+    "!export CUDF_SPILL=\"1\"\n",
+    "!export LIBCUDF_CUFILE_POLICY=\"OFF\"\n",
+    "\n",
+    "from cugraph_bulk_sampling import start_dask_client, benchmark_cugraph_bulk_sampling, load_disk_dataset, construct_graph\n",
+    "from cugraph_bulk_sampling import sample_graph\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f3f04da7-c937-4dab-b432-fc569522f411",
+   "metadata": {},
+   "source": [
+    "# Setup Cluster"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "26324a75-1b34-4c7b-8a26-23bac23e91b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dask_worker_devices='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "fc8d56ef-4036-4105-9764-1c6cbb2bdb15",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Dask client/cluster created using LocalCUDACluster\n"
+     ]
+    }
+   ],
+   "source": [
+    "client, cluster = start_dask_client(dask_worker_devices=dask_worker_devices,\n",
+    "                                    jit_unspill=False,\n",
+    "                                    rmm_pool_size=28e9,\n",
+    "                                    rmm_async=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5335b115-eeb0-470d-9884-79990506ead7",
+   "metadata": {},
+   "source": [
+    "# Setup Benchmark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c9c8fb66-6bdd-45d7-8564-cc28e383d966",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset='ogbn_papers100M'\n",
+    "dataset_root=\".\"\n",
+    "output_root=\".\"\n",
+    "reverse_edges=True\n",
+    "add_edge_types=False\n",
+    "batch_size=512\n",
+    "seeds_per_call=524288\n",
+    "fanout=[25,25]\n",
+    "replication_factor=4\n",
+    "seed=123\n",
+    "\n",
+    "dataset_dir=dataset_root\n",
+    "output_path=output_root\n",
+    "persist=False\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "37ed06f6-ad06-443a-be12-61800d59d221",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading edge index for edge type paper__cites__paper\n",
+      "Loading node labels for node type paper (offset=0)\n",
+      "Number of input edges = 6,462,743,488\n",
+      "constructed graph\n"
+     ]
+    }
+   ],
+   "source": [
+    "dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \\\n",
+    "    load_disk_dataset(\n",
+    "        dataset,\n",
+    "        dataset_dir=dataset_dir,\n",
+    "        reverse_edges=reverse_edges,\n",
+    "        replication_factor=replication_factor,\n",
+    "        persist=False,\n",
+    "        add_edge_types=add_edge_types\n",
+    "    )\n",
+    "num_input_edges = len(dask_edgelist_df)\n",
+    "print(\n",
+    "f\"Number of input edges = {num_input_edges:,}\"\n",
+    ")\n",
+    "\n",
+    "G = construct_graph(\n",
+    "dask_edgelist_df\n",
+    ")\n",
+    "del dask_edgelist_df\n",
+    "print('constructed graph')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f71cf5a3-7e4b-4497-9c14-a342cc5abbcd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/index.py:3139: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/index.py:3139: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "input memory: 103403895808\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_memory = G.edgelist.edgelist_df.memory_usage().sum().compute()\n",
+    "print(f'input memory: {input_memory}')\n",
+    "\n",
+    "output_subdir = os.path.join(output_path, f'{dataset}[{replication_factor}]_b{batch_size}_f{fanout}')\n",
+    "os.makedirs(output_subdir, exist_ok=True)\n",
+    "\n",
+    "output_sample_path = os.path.join(output_subdir, 'samples')\n",
+    "os.makedirs(output_sample_path,  exist_ok=True)\n",
+    "\n",
+    "batches_per_partition = 200_000 // batch_size"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3d276c5c-65d6-4191-b2a5-37b30d2cd44b",
+   "metadata": {},
+   "source": [
+    "# Benchmarking Sample Graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "675b580c-6a7a-4571-88dd-0d4429f9e5ff",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 9.981931209564209\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '51.1MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '58.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '59.0MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '149.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '82.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '98.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '240.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '73.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '310.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '267.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '80.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '131.8MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '205.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '288.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '303.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '130.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.299846172332764\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '252.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '278.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '243.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '256.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '330.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '239.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '254.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '239.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '278.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '68.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '397.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '79.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '127.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '90.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '303.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '53.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.2623701095581055\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '73.1MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '179.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '253.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '366.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '98.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '130.1MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '152.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '435.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '463.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '151.5MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '379.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '192.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '150.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '277.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '416.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.487639665603638\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '176.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '292.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '118.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '60.2MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '204.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '208.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '185.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '254.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '175.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '102.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '83.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '197.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '142.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '262.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '227.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.208818197250366\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '261.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '360.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '240.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '225.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '428.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '288.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '202.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '128.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '258.3MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '203.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '278.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '102.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '596.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '301.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '203.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.344887971878052\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '171.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '225.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '245.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '315.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '248.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '147.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '64.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '290.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '301.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '152.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '276.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '240.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '252.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '625.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '117.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '192.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.830034255981445\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '202.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '224.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '101.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '223.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '115.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '59.8MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '260.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '82.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '194.4MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '82.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '168.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '252.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '181.7MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '277.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '157.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.967972278594971\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '355.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '65.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '224.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '339.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '208.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '92.4MB',\n",
+      "                                 'peak_bytes': '2.3GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '59.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '452.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '440.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '159.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '228.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '261.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '129.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '262.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '579.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.228902339935303\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '169.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '179.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '214.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '57.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '85.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '283.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '81.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '240.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '83.1MB',\n",
+      "                                 'peak_bytes': '2.3GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '193.2MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '117.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '181.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '408.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '309.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '237.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.624476432800293\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '159.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '181.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '134.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '214.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '281.9MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '201.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '399.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '128.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '291.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '228.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '125.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '383.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '313.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '152.3MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '249.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '100.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.998133420944214\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '209.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '211.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '115.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '239.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '40.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '274.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '190.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '290.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '61.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '496.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '242.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '164.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '276.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '341.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '295.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '131.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.723633289337158\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '220.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '375.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '246.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '291.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '179.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '202.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '259.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '241.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '283.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '261.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '124.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '252.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '134.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '277.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '100.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.884088754653931\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '544.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '65.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '222.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '136.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '257.0MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '92.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '86.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '452.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '233.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '191.8MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '385.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '365.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '288.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '263.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '257.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.929041385650635\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '106.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '89.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '198.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '77.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '393.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '285.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '247.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '316.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '86.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '244.2MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '255.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '181.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '218.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '317.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '297.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.864148378372192\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '176.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '205.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '67.0MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '250.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '197.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '201.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '252.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '359.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '280.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '309.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '371.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '256.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '197.4MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '249.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '297.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.87973165512085\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '237.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '210.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '182.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '109.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '256.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '289.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '144.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '231.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '456.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '230.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '271.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '290.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '308.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '309.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '206.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '82.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.986347436904907\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '264.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '33.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '91.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '116.7MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '170.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '154.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '109.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '71.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '116.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '280.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '271.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '561.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '277.0MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '350.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '296.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '93.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.348597764968872\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '209.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '254.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '236.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '121.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '68.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '344.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '75.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '52.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '146.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '127.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '267.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '218.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '466.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '279.0MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '262.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '211.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.334516286849976\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '355.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '252.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '40.9MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '99.9MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '211.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '295.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '556.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '264.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '129.9MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '83.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '250.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '254.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '249.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '152.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.598327398300171\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '226.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '227.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '230.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '90.4MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '190.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '278.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '167.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '186.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '449.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '96.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '406.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '110.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '249.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '594.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '278.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '142.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.590704679489136\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '257.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '339.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '221.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '158.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '455.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '144.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '231.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '196.0MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '159.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '225.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '726.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '134.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '191.8MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '263.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '170.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.666577577590942\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '536.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '19.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '113.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '138.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '197.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '144.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '128.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '307.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '150.0MB',\n",
+      "                                 'peak_bytes': '2.3GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '164.7MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '213.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '246.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '106.5MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '309.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '268.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.439242839813232\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '106.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '222.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '81.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '66.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '199.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '72.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '240.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '232.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '150.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '256.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '200.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '466.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '312.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '134.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '493.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.422755718231201\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '220.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '153.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '50.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '339.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '92.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '190.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '236.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '274.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '201.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '122.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '144.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '602.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '510.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '78.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.785901784896851\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '258.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '224.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '181.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '217.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '167.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '436.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '164.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '205.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '204.9MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '172.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '297.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '61.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '282.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '259.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '171.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '143.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.363157033920288\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '536.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '224.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '250.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '225.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '220.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '181.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '252.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '240.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '134.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '228.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '405.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '78.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '174.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '276.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '437.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.568510055541992\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '171.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '224.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '117.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '254.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '427.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '116.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '128.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '247.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '139.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '116.4MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '590.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '207.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '182.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '133.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '214.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.794158220291138\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '235.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '225.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '227.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '239.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '230.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '212.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '93.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '195.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '231.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '386.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '256.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '86.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '659.0MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '811.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '265.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '322.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 7.261108636856079\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '355.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '251.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '238.9MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '336.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '166.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '236.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '228.9MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '259.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '245.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '562.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '232.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '206.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '181.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '177.0MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '185.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '224.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.985189437866211\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '66.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '168.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '413.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '141.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '324.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '368.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '452.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '154.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '181.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '118.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '280.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '236.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '275.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '183.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '167.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "6.32 s ± 0 ns per loop (mean ± std. dev. of 1 run, 30 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit -n30 -r1\n",
+    "\n",
+    "\n",
+    "execution_time, allocation_counts = sample_graph(\n",
+    "    G,\n",
+    "    dask_label_df,\n",
+    "    output_sample_path,\n",
+    "    seed=seed,\n",
+    "    batch_size=batch_size,\n",
+    "    seeds_per_call=seeds_per_call,\n",
+    "    batches_per_partition=batches_per_partition,\n",
+    "    fanout=fanout,\n",
+    "    persist=persist,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27066cf3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.11 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh b/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh
new file mode 100755
index 00000000000..e62cb3cda29
--- /dev/null
+++ b/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh
@@ -0,0 +1,50 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export RAPIDS_NO_INITIALIZE="1"
+export CUDF_SPILL="1"
+export LIBCUDF_CUFILE_POLICY=OFF
+
+
+dataset_name=$1
+dataset_root=$2
+output_root=$3
+batch_sizes=$4
+fanouts=$5
+reverse_edges=$6
+
+rm -rf $output_root
+mkdir -p $output_root
+
+# Change to 2 in Selene
+gpu_per_replica=4
+#--add_edge_ids \
+
+# Expand to 1, 4, 8 in Selene
+for i in 1,2,3,4:
+do 
+    for replication in 2;
+    do
+        dataset_name_with_replication="${dataset_name}[${replication}]"
+        dask_worker_devices=$(seq -s, 0 $((gpu_per_replica*replication-1)))
+        echo "Sampling dataset = $dataset_name_with_replication on devices = $dask_worker_devices"
+        python3 cugraph_bulk_sampling.py --datasets $dataset_name_with_replication \
+                --dataset_root $dataset_root \
+                --batch_sizes $batch_sizes \
+                --output_root $output_root \
+                --dask_worker_devices $dask_worker_devices \
+                --fanouts $fanouts \
+                --batch_sizes $batch_sizes \
+                --reverse_edges
+    done
+done
\ No newline at end of file
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py
index 3cfd39afc98..d2a3716da8a 100644
--- a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py
+++ b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py
@@ -33,8 +33,6 @@
 
 import cugraph
 
-from datetime import datetime
-
 import json
 import re
 import os
@@ -50,6 +48,7 @@
 import dask_cudf
 import dask.dataframe as ddf
 from dask.distributed import default_client
+from cugraph.dask import get_n_workers
 
 from typing import Optional, Union, Dict
 
@@ -173,6 +172,7 @@ def sample_graph(G, label_df, output_path,seed=42, batch_size=500, seeds_per_cal
         random_state=seed,
         seeds_per_call=seeds_per_call,
         batches_per_partition=batches_per_partition,
+        log_level = logging.INFO
     )
 
     n_workers = len(default_client().scheduler_info()['workers'])
@@ -182,10 +182,10 @@ def sample_graph(G, label_df, output_path,seed=42, batch_size=500, seeds_per_cal
         'batch': cudf.Series(dtype='int32')
     })
 
-    
     batch_df = label_df.map_partitions(_make_batch_ids, batch_size, n_workers, meta=meta)
+    #batch_df = batch_df.sort_values(by='node')
     
-    # should always persist the batch dataframe or performace may be suboptimal
+    # should always persist the batch dataframe or performance may be suboptimal
     batch_df = batch_df.persist()
 
     del label_df
@@ -278,6 +278,8 @@ def load_disk_dataset(dataset, dataset_dir='.', reverse_edges=True, replication_
     path = Path(dataset_dir) / dataset
     parquet_path = path / 'parquet'
 
+    n_workers = get_n_workers()
+
     with open(os.path.join(path, 'meta.json')) as meta_file:
         meta = json.load(meta_file)
     
@@ -289,7 +291,9 @@ def load_disk_dataset(dataset, dataset_dir='.', reverse_edges=True, replication_
         print(f'Loading edge index for edge type {edge_type}')
 
         can_edge_type = tuple(edge_type.split('__'))
-        edge_index_dict[can_edge_type] = dask_cudf.read_parquet(os.path.join(os.path.join(parquet_path, edge_type), 'edge_index.parquet'))
+        edge_index_dict[can_edge_type] = dask_cudf.read_parquet(
+            Path(parquet_path) / edge_type / 'edge_index.parquet'
+        ).repartition(n_workers*2)
 
         edge_index_dict[can_edge_type]['src'] += node_offsets_replicated[can_edge_type[0]]
         edge_index_dict[can_edge_type]['dst'] += node_offsets_replicated[can_edge_type[-1]]
@@ -344,7 +348,7 @@ def load_disk_dataset(dataset, dataset_dir='.', reverse_edges=True, replication_
         print(f'Loading node labels for node type {node_type} (offset={offset})')
         node_label_path = os.path.join(os.path.join(parquet_path, node_type), 'node_label.parquet')
         if os.path.exists(node_label_path):
-            node_labels[node_type] = dask_cudf.read_parquet(node_label_path).drop('label',axis=1).persist()
+            node_labels[node_type] = dask_cudf.read_parquet(node_label_path).repartition(n_workers).drop('label',axis=1).persist()
             node_labels[node_type]['node'] += offset
             node_labels[node_type] = node_labels[node_type].persist()
 
diff --git a/mg_utils/run-dask-process.sh b/mg_utils/run-dask-process.sh
index e5fa8fab332..b88abb685ec 100755
--- a/mg_utils/run-dask-process.sh
+++ b/mg_utils/run-dask-process.sh
@@ -102,6 +102,7 @@ function buildTcpArgs {
                 "
 
     WORKER_ARGS="--rmm-pool-size=$WORKER_RMM_POOL_SIZE
+             --rmm-async
              --local-directory=/tmp/$LOGNAME
              --scheduler-file=$SCHEDULER_FILE
              --memory-limit=$DASK_HOST_MEMORY_LIMIT
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
index 7d8972a7385..d74a8df14eb 100644
--- a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
@@ -14,10 +14,11 @@
 
 from __future__ import annotations
 
+import warnings
+
 import numpy
 from dask import delayed
-from dask.distributed import wait, Lock, get_client
-from cugraph.dask.common.input_utils import get_distributed_data
+from dask.distributed import Lock, get_client, wait
 
 import dask_cudf
 import cudf
@@ -26,12 +27,20 @@
 from pylibcugraph import ResourceHandle
 
 from pylibcugraph import uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample
+from pylibcugraph.utilities.api_tools import deprecated_warning_wrapper
 
 from cugraph.dask.comms import comms as Comms
+from cugraph.dask.common.input_utils import get_distributed_data
+from cugraph.dask import get_n_workers
 
 from typing import Sequence, List, Union, Tuple
 from typing import TYPE_CHECKING
 
+from cugraph.dask.common.part_utils import (
+    get_persisted_df_worker_map,
+    persist_dask_df_equal_parts_per_worker,
+)
+
 if TYPE_CHECKING:
     from cugraph import Graph
 
@@ -150,7 +159,63 @@ def convert_to_cudf(cp_arrays, weight_t, with_edge_properties, return_offsets=Fa
         return df
 
 
+def __get_label_to_output_comm_rank(min_batch_id, max_batch_id, n_workers):
+    num_batches = max_batch_id - min_batch_id + 1
+    num_batches = int(num_batches)
+    z = cp.zeros(num_batches, dtype="int32")
+    s = cp.array_split(cp.arange(num_batches), n_workers)
+    for i, t in enumerate(s):
+        z[t] = i
+
+    return z
+
+
 def _call_plc_uniform_neighbor_sample(
+    sID,
+    mg_graph_x,
+    st_x,
+    keep_batches_together,
+    n_workers,
+    min_batch_id,
+    max_batch_id,
+    fanout_vals,
+    with_replacement,
+    weight_t,
+    with_edge_properties,
+    random_state=None,
+    return_offsets=False,
+):
+    st_x = st_x[0]
+    start_list_x = st_x[start_col_name]
+    batch_id_list_x = st_x[batch_col_name] if batch_col_name in st_x else None
+
+    label_list = None
+    label_to_output_comm_rank = None
+    if keep_batches_together:
+        label_list = cp.arange(min_batch_id, max_batch_id + 1, dtype="int32")
+        label_to_output_comm_rank = __get_label_to_output_comm_rank(
+            min_batch_id, max_batch_id, n_workers
+        )
+
+    cp_arrays = pylibcugraph_uniform_neighbor_sample(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        input_graph=mg_graph_x,
+        start_list=start_list_x,
+        label_list=label_list,
+        label_to_output_comm_rank=label_to_output_comm_rank,
+        h_fan_out=fanout_vals,
+        with_replacement=with_replacement,
+        do_expensive_check=False,
+        with_edge_properties=with_edge_properties,
+        batch_id_list=batch_id_list_x,
+        random_state=random_state,
+    )
+    return convert_to_cudf(
+        cp_arrays, weight_t, with_edge_properties, return_offsets=return_offsets
+    )
+
+
+def _call_plc_uniform_neighbor_sample_legacy(
     sID,
     mg_graph_x,
     st_x,
@@ -183,7 +248,7 @@ def _call_plc_uniform_neighbor_sample(
     )
 
 
-def _mg_call_plc_uniform_neighbor_sample(
+def _mg_call_plc_uniform_neighbor_sample_legacy(
     client,
     session_id,
     input_graph,
@@ -200,7 +265,7 @@ def _mg_call_plc_uniform_neighbor_sample(
 ):
     result = [
         client.submit(
-            _call_plc_uniform_neighbor_sample,
+            _call_plc_uniform_neighbor_sample_legacy,
             session_id,
             input_graph._plc_graph[w],
             ddf[w][0],
@@ -247,7 +312,92 @@ def _mg_call_plc_uniform_neighbor_sample(
         return ddf
 
 
-def uniform_neighbor_sample(
+def _mg_call_plc_uniform_neighbor_sample(
+    client,
+    session_id,
+    input_graph,
+    ddf,
+    keep_batches_together,
+    min_batch_id,
+    max_batch_id,
+    fanout_vals,
+    with_replacement,
+    weight_t,
+    indices_t,
+    with_edge_properties,
+    random_state,
+    return_offsets=False,
+):
+    n_workers = None
+    if keep_batches_together:
+        n_workers = get_n_workers()
+
+        if hasattr(min_batch_id, "compute"):
+            min_batch_id = min_batch_id.compute()
+        if hasattr(max_batch_id, "compute"):
+            max_batch_id = max_batch_id.compute()
+
+    result = [
+        client.submit(
+            _call_plc_uniform_neighbor_sample,
+            session_id,
+            input_graph._plc_graph[w],
+            starts,
+            keep_batches_together,
+            n_workers,
+            min_batch_id,
+            max_batch_id,
+            fanout_vals,
+            with_replacement,
+            weight_t=weight_t,
+            with_edge_properties=with_edge_properties,
+            # FIXME accept and properly transmute a numpy/cupy random state.
+            random_state=hash((random_state, w)),
+            return_offsets=return_offsets,
+            allow_other_workers=False,
+            pure=False,
+        )
+        for w, starts in ddf.items()
+    ]
+    del ddf
+
+    empty_df = (
+        create_empty_df_with_edge_props(
+            indices_t, weight_t, return_offsets=return_offsets
+        )
+        if with_edge_properties
+        else create_empty_df(indices_t, weight_t)
+    )
+
+    wait(result)
+
+    if return_offsets:
+        result_split = [delayed(lambda x: x, nout=2)(r) for r in result]
+        ddf = dask_cudf.from_delayed(
+            [r[0] for r in result_split], meta=empty_df[0], verify_meta=False
+        ).persist()
+        ddf_offsets = dask_cudf.from_delayed(
+            [r[1] for r in result_split], meta=empty_df[1], verify_meta=False
+        ).persist()
+
+        wait([ddf, ddf_offsets])
+        wait([r.release() for r in result_split])
+        wait([r.release() for r in result])
+
+        del result
+
+        return ddf, ddf_offsets
+    else:
+        ddf = dask_cudf.from_delayed(result, meta=empty_df, verify_meta=False).persist()
+
+        wait(ddf)
+        wait([r.release() for r in result])
+        del result
+
+        return ddf
+
+
+def _uniform_neighbor_sample_legacy(
     input_graph: Graph,
     start_list: Sequence,
     fanout_vals: List[int],
@@ -259,6 +409,162 @@ def uniform_neighbor_sample(
     random_state: int = None,
     return_offsets: bool = False,
     _multiple_clients: bool = False,
+) -> Union[dask_cudf.DataFrame, Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]]:
+    warnings.warn(
+        "The batch_id_list, label_list, and label_to_output_comm_rank "
+        "parameters are deprecated.  Consider using with_batch_ids, "
+        "keep_batches_together, min_batch_id, and max_batch_id instead."
+    )
+
+    if isinstance(start_list, int):
+        start_list = [start_list]
+
+    if isinstance(start_list, list):
+        start_list = cudf.Series(
+            start_list,
+            dtype=input_graph.edgelist.edgelist_df[
+                input_graph.renumber_map.renumbered_src_col_name
+            ].dtype,
+        )
+
+    elif with_edge_properties and batch_id_list is None:
+        batch_id_list = cudf.Series(cp.zeros(len(start_list), dtype="int32"))
+
+    # fanout_vals must be a host array!
+    # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
+    if isinstance(fanout_vals, list):
+        fanout_vals = numpy.asarray(fanout_vals, dtype="int32")
+    else:
+        raise TypeError("fanout_vals must be a list, " f"got: {type(fanout_vals)}")
+
+    if "value" in input_graph.edgelist.edgelist_df:
+        weight_t = input_graph.edgelist.edgelist_df["value"].dtype
+    else:
+        weight_t = "float32"
+
+    if "_SRC_" in input_graph.edgelist.edgelist_df:
+        indices_t = input_graph.edgelist.edgelist_df["_SRC_"].dtype
+    elif src_n in input_graph.edgelist.edgelist_df:
+        indices_t = input_graph.edgelist.edgelist_df[src_n].dtype
+    else:
+        indices_t = numpy.int32
+
+    start_list = start_list.rename(start_col_name)
+    if batch_id_list is not None:
+        batch_id_list = batch_id_list.rename(batch_col_name)
+        if hasattr(start_list, "compute"):
+            # mg input
+            start_list = start_list.to_frame()
+            batch_id_list = batch_id_list.to_frame()
+            ddf = start_list.merge(
+                batch_id_list,
+                how="left",
+                left_index=True,
+                right_index=True,
+            )
+        else:
+            # sg input
+            ddf = cudf.concat(
+                [
+                    start_list,
+                    batch_id_list,
+                ],
+                axis=1,
+            )
+    else:
+        ddf = start_list.to_frame()
+
+    if input_graph.renumbered:
+        ddf = input_graph.lookup_internal_vertex_id(ddf, column_name=start_col_name)
+
+    if hasattr(ddf, "compute"):
+        ddf = get_distributed_data(ddf)
+        wait(ddf)
+        ddf = ddf.worker_to_parts
+    else:
+        splits = cp.array_split(cp.arange(len(ddf)), len(Comms.get_workers()))
+        ddf = {w: [ddf.iloc[splits[i]]] for i, w in enumerate(Comms.get_workers())}
+
+    client = get_client()
+    session_id = Comms.get_session_id()
+    if _multiple_clients:
+        # Distributed centralized lock to allow
+        # two disconnected processes (clients) to coordinate a lock
+        # https://docs.dask.org/en/stable/futures.html?highlight=lock#distributed.Lock
+        lock = Lock("plc_graph_access")
+        if lock.acquire(timeout=100):
+            try:
+                ddf = _mg_call_plc_uniform_neighbor_sample_legacy(
+                    client=client,
+                    session_id=session_id,
+                    input_graph=input_graph,
+                    ddf=ddf,
+                    label_list=label_list,
+                    label_to_output_comm_rank=label_to_output_comm_rank,
+                    fanout_vals=fanout_vals,
+                    with_replacement=with_replacement,
+                    weight_t=weight_t,
+                    indices_t=indices_t,
+                    with_edge_properties=with_edge_properties,
+                    random_state=random_state,
+                    return_offsets=return_offsets,
+                )
+            finally:
+                lock.release()
+        else:
+            raise RuntimeError(
+                "Failed to acquire lock(plc_graph_access) while trying to sampling"
+            )
+    else:
+        ddf = _mg_call_plc_uniform_neighbor_sample_legacy(
+            client=client,
+            session_id=session_id,
+            input_graph=input_graph,
+            ddf=ddf,
+            label_list=label_list,
+            label_to_output_comm_rank=label_to_output_comm_rank,
+            fanout_vals=fanout_vals,
+            with_replacement=with_replacement,
+            weight_t=weight_t,
+            indices_t=indices_t,
+            with_edge_properties=with_edge_properties,
+            random_state=random_state,
+            return_offsets=return_offsets,
+        )
+
+    if return_offsets:
+        ddf, offsets_ddf = ddf
+    if input_graph.renumbered:
+        ddf = input_graph.unrenumber(ddf, "sources", preserve_order=True)
+        ddf = input_graph.unrenumber(ddf, "destinations", preserve_order=True)
+
+    if return_offsets:
+        return ddf, offsets_ddf
+
+    return ddf
+
+
+uniform_neighbor_sample_legacy = deprecated_warning_wrapper(
+    _uniform_neighbor_sample_legacy
+)
+
+
+def uniform_neighbor_sample(
+    input_graph: Graph,
+    start_list: Sequence,
+    fanout_vals: List[int],
+    with_replacement: bool = True,
+    with_edge_properties: bool = False,
+    batch_id_list: Sequence = None,  # deprecated
+    label_list: Sequence = None,  # deprecated
+    label_to_output_comm_rank: bool = None,  # deprecated
+    with_batch_ids: bool = False,
+    keep_batches_together=False,
+    min_batch_id=None,
+    max_batch_id=None,
+    random_state: int = None,
+    return_offsets: bool = False,
+    _multiple_clients: bool = False,
 ) -> Union[dask_cudf.DataFrame, Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]]:
     """
     Does neighborhood sampling, which samples nodes from a graph based on the
@@ -285,20 +591,36 @@ def uniform_neighbor_sample(
         edge type, batch id, hop id) with the sampled edges.
 
     batch_id_list: cudf.Series or dask_cudf.Series (int32), optional (default=None)
+        Deprecated.
         List of batch ids that will be returned with the sampled edges if
         with_edge_properties is set to True.
 
     label_list: cudf.Series or dask_cudf.Series (int32), optional (default=None)
+        Deprecated.
         List of unique batch id labels.  Used along with
         label_to_output_comm_rank to assign batch ids to GPUs.
 
     label_to_out_comm_rank: cudf.Series or dask_cudf.Series (int32),
     optional (default=None)
+        Deprecated.
         List of output GPUs (by rank) corresponding to batch
         id labels in the label list.  Used to assign each batch
         id to a GPU.
         Must be in ascending order (i.e. [0, 0, 1, 2]).
 
+    with_batch_ids: bool, optional (default=False)
+        Flag to specify whether batch ids are present in the start_list
+
+    keep_batches_together: bool (optional, default=False)
+        If True, will ensure that the returned samples for each batch are on the
+        same partition.
+
+    min_batch_id: int (optional, default=None)
+        Required for the keep_batches_together option.  The minimum batch id.
+
+    max_batch_id: int (optional, default=None)
+        Required for the keep_batches_together option.  The maximum batch id.
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -363,6 +685,25 @@ def uniform_neighbor_sample(
                     Contains the offsets of each batch in the sampling result
     """
 
+    if (
+        batch_id_list is not None
+        or label_list is not None
+        or label_to_output_comm_rank is not None
+    ):
+        return uniform_neighbor_sample_legacy(
+            input_graph,
+            start_list,
+            fanout_vals,
+            with_replacement=with_replacement,
+            with_edge_properties=with_edge_properties,
+            batch_id_list=batch_id_list,
+            label_list=label_list,
+            label_to_output_comm_rank=label_to_output_comm_rank,
+            random_state=random_state,
+            return_offsets=return_offsets,
+            _multiple_clients=_multiple_clients,
+        )
+
     if isinstance(start_list, int):
         start_list = [start_list]
 
@@ -373,9 +714,21 @@ def uniform_neighbor_sample(
                 input_graph.renumber_map.renumbered_src_col_name
             ].dtype,
         )
+    elif with_edge_properties and not with_batch_ids:
+        if isinstance(start_list, (cudf.DataFrame, dask_cudf.DataFrame)):
+            raise ValueError("expected 1d input for start list without batch ids")
 
-    elif with_edge_properties and batch_id_list is None:
-        batch_id_list = cudf.Series(cp.zeros(len(start_list), dtype="int32"))
+        start_list = start_list.to_frame()
+        start_list[batch_id_n] = cudf.Series(cp.zeros(len(start_list), dtype="int32"))
+
+    if keep_batches_together and min_batch_id is None:
+        raise ValueError(
+            "must provide min_batch_id if using keep_batches_together option"
+        )
+    if keep_batches_together and max_batch_id is None:
+        raise ValueError(
+            "must provide max_batch_id if using keep_batches_together option"
+        )
 
     # fanout_vals must be a host array!
     # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
@@ -396,44 +749,30 @@ def uniform_neighbor_sample(
     else:
         indices_t = numpy.int32
 
-    start_list = start_list.rename(start_col_name)
-    if batch_id_list is not None:
-        batch_id_list = batch_id_list.rename(batch_col_name)
-        if hasattr(start_list, "compute"):
-            # mg input
-            start_list = start_list.to_frame()
-            batch_id_list = batch_id_list.to_frame()
-            ddf = start_list.merge(
-                batch_id_list,
-                how="left",
-                left_index=True,
-                right_index=True,
-            )
-        else:
-            # sg input
-            ddf = cudf.concat(
-                [
-                    start_list,
-                    batch_id_list,
-                ],
-                axis=1,
-            )
-    else:
+    if isinstance(start_list, (cudf.Series, dask_cudf.Series)):
+        start_list = start_list.rename(start_col_name)
         ddf = start_list.to_frame()
+    else:
+        ddf = start_list
+        columns = ddf.columns
+        ddf = ddf.rename(
+            columns={columns[0]: start_col_name, columns[-1]: batch_col_name}
+        )
 
     if input_graph.renumbered:
         ddf = input_graph.lookup_internal_vertex_id(ddf, column_name=start_col_name)
 
-    if hasattr(ddf, "compute"):
-        ddf = get_distributed_data(ddf)
-        wait(ddf)
-        ddf = ddf.worker_to_parts
-    else:
-        splits = cp.array_split(cp.arange(len(ddf)), len(Comms.get_workers()))
-        ddf = {w: [ddf.iloc[splits[i]]] for i, w in enumerate(Comms.get_workers())}
-
     client = get_client()
     session_id = Comms.get_session_id()
+    n_workers = get_n_workers()
+
+    if isinstance(ddf, cudf.DataFrame):
+        ddf = dask_cudf.from_cudf(ddf, npartitions=n_workers)
+
+    ddf = ddf.repartition(npartitions=n_workers)
+    ddf = persist_dask_df_equal_parts_per_worker(ddf, client)
+    ddf = get_persisted_df_worker_map(ddf, client)
+
     if _multiple_clients:
         # Distributed centralized lock to allow
         # two disconnected processes (clients) to coordinate a lock
@@ -446,8 +785,9 @@ def uniform_neighbor_sample(
                     session_id=session_id,
                     input_graph=input_graph,
                     ddf=ddf,
-                    label_list=label_list,
-                    label_to_output_comm_rank=label_to_output_comm_rank,
+                    keep_batches_together=keep_batches_together,
+                    min_batch_id=min_batch_id,
+                    max_batch_id=max_batch_id,
                     fanout_vals=fanout_vals,
                     with_replacement=with_replacement,
                     weight_t=weight_t,
@@ -468,8 +808,9 @@ def uniform_neighbor_sample(
             session_id=session_id,
             input_graph=input_graph,
             ddf=ddf,
-            label_list=label_list,
-            label_to_output_comm_rank=label_to_output_comm_rank,
+            keep_batches_together=keep_batches_together,
+            min_batch_id=min_batch_id,
+            max_batch_id=max_batch_id,
             fanout_vals=fanout_vals,
             with_replacement=with_replacement,
             weight_t=weight_t,
diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
index 33de5fdc185..a2b0a367d1d 100644
--- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
+++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
@@ -15,16 +15,21 @@
 
 from typing import Union
 
-import cupy
 import cudf
 import dask_cudf
-import cugraph.dask as dask_cugraph
+
+from dask.distributed import wait
+from dask.distributed import futures_of
 
 import cugraph
 import pylibcugraph
 
 from cugraph.gnn.data_loading.bulk_sampler_io import write_samples
 
+import warnings
+import logging
+import time
+
 
 class EXPERIMENTAL__BulkSampler:
     start_col_name = "_START_"
@@ -36,7 +41,8 @@ def __init__(
         output_path: str,
         graph,
         seeds_per_call: int = 200_000,
-        batches_per_partition=100,
+        batches_per_partition: int = 100,
+        log_level: int = None,
         **kwargs,
     ):
         """
@@ -55,13 +61,19 @@ def __init__(
             a single sampling call.
         batches_per_partition: int (optional, default=100)
             The number of batches outputted to a single parquet partition.
+        log_level: int (optional, default=None)
+            Whether to enable logging for this sampler. Supports 3 levels
+            of logging if enabled (INFO, WARNING, ERROR).  If not provided,
+            defaults to WARNING.
         kwargs: kwargs
             Keyword arguments to be passed to the sampler (i.e. fanout).
         """
 
+        self.__logger = logging.getLogger(__name__)
+        self.__logger.setLevel(log_level or logging.WARNING)
+
         max_batches_per_partition = seeds_per_call // batch_size
         if batches_per_partition > max_batches_per_partition:
-            import warnings
 
             warnings.warn(
                 f"batches_per_partition ({batches_per_partition}) is >"
@@ -140,7 +152,7 @@ def add_batches(
         ...     start_col_name="start_vid",
         ...     batch_col_name="start_batch")
         """
-        df = df.rename(
+        df = df[[start_col_name, batch_col_name]].rename(
             columns={
                 start_col_name: self.start_col_name,
                 batch_col_name: self.batch_col_name,
@@ -163,6 +175,11 @@ def add_batches(
                 )
 
         if self.size >= self.seeds_per_call:
+            self.__logger.info(
+                f"Number of input seeds ({self.size})"
+                f" is >= seeds per call ({self.seeds_per_call})."
+                " Calling flush() to compute and write minibatches."
+            )
             self.flush()
 
     def flush(self) -> None:
@@ -171,14 +188,16 @@ def flush(self) -> None:
         """
         if self.size == 0:
             return
-        self.__batches.reset_index(drop=True)
+
+        start_time_calc_batches = time.perf_counter()
         if isinstance(self.__batches, dask_cudf.DataFrame):
             self.__batches = self.__batches.persist()
 
         min_batch_id = self.__batches[self.batch_col_name].min()
         if isinstance(self.__batches, dask_cudf.DataFrame):
-            min_batch_id = min_batch_id.compute()
-        min_batch_id = int(min_batch_id)
+            min_batch_id = min_batch_id.persist()
+        else:
+            min_batch_id = int(min_batch_id)
 
         partition_size = self.batches_per_partition * self.batch_size
         partitions_per_call = (
@@ -187,7 +206,19 @@ def flush(self) -> None:
         npartitions = partitions_per_call
 
         max_batch_id = min_batch_id + npartitions * self.batches_per_partition - 1
+        if isinstance(self.__batches, dask_cudf.DataFrame):
+            max_batch_id = max_batch_id.persist()
+
         batch_id_filter = self.__batches[self.batch_col_name] <= max_batch_id
+        if isinstance(batch_id_filter, dask_cudf.Series):
+            batch_id_filter = batch_id_filter.persist()
+
+        end_time_calc_batches = time.perf_counter()
+        self.__logger.info(
+            f"Calculated batches to sample; min = {min_batch_id}"
+            f" and max = {max_batch_id};"
+            f" took {end_time_calc_batches - start_time_calc_batches:.4f} s"
+        )
 
         if isinstance(self.__graph._plc_graph, pylibcugraph.graphs.SGGraph):
             sample_fn = cugraph.uniform_neighbor_sample
@@ -196,31 +227,62 @@ def flush(self) -> None:
             self.__sample_call_args.update(
                 {
                     "_multiple_clients": True,
-                    "label_to_output_comm_rank": self.__get_label_to_output_comm_rank(
-                        min_batch_id, max_batch_id
-                    ),
-                    "label_list": cupy.arange(
-                        min_batch_id, max_batch_id + 1, dtype="int32"
-                    ),
+                    "keep_batches_together": True,
+                    "min_batch_id": min_batch_id,
+                    "max_batch_id": max_batch_id,
                 }
             )
 
+        start_time_sample_call = time.perf_counter()
+
+        # Call uniform neighbor sample
         samples, offsets = sample_fn(
             self.__graph,
             **self.__sample_call_args,
-            start_list=self.__batches[self.start_col_name][batch_id_filter],
-            batch_id_list=self.__batches[self.batch_col_name][batch_id_filter],
+            start_list=self.__batches[[self.start_col_name, self.batch_col_name]][
+                batch_id_filter
+            ],
+            with_batch_ids=True,
             with_edge_properties=True,
             return_offsets=True,
         )
 
+        end_time_sample_call = time.perf_counter()
+        sample_runtime = end_time_sample_call - start_time_sample_call
+
+        self.__logger.info(
+            f"Called uniform neighbor sample, took {sample_runtime:.4f} s"
+        )
+
+        # Filter batches to remove those already processed
         self.__batches = self.__batches[~batch_id_filter]
+        del batch_id_filter
         if isinstance(self.__batches, dask_cudf.DataFrame):
             self.__batches = self.__batches.persist()
 
+        start_time_write = time.perf_counter()
+
+        # Write batches to parquet
         self.__write(samples, offsets)
+        if isinstance(self.__batches, dask_cudf.DataFrame):
+            wait(
+                [f.release() for f in futures_of(samples)]
+                + [f.release() for f in futures_of(offsets)]
+            )
 
-        if self.size > 0:
+        del samples
+        del offsets
+
+        end_time_write = time.perf_counter()
+        write_runtime = end_time_write - start_time_write
+        self.__logger.info(f"Wrote samples to parquet, took {write_runtime} seconds")
+
+        current_size = self.size
+        if current_size > 0:
+            self.__logger.info(
+                f"There are still {current_size} samples remaining, "
+                "calling flush() again..."
+            )
             self.flush()
 
     def __write(
@@ -232,13 +294,3 @@ def __write(
         write_samples(
             samples, offsets, self.__batches_per_partition, self.__output_path
         )
-
-    def __get_label_to_output_comm_rank(self, min_batch_id, max_batch_id):
-        num_workers = dask_cugraph.get_n_workers()
-        num_batches = max_batch_id - min_batch_id + 1
-        z = cupy.zeros(num_batches, dtype="int32")
-        s = cupy.array_split(cupy.arange(num_batches), num_workers)
-        for i, t in enumerate(s):
-            z[t] = i
-
-        return cudf.Series(z)
diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
index d7f1c136484..44c1185bbf1 100644
--- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
+++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
@@ -24,7 +24,7 @@ def _write_samples_to_parquet(
     batches_per_partition: int,
     output_path: str,
     partition_info: Optional[Union[dict, str]] = None,
-) -> None:
+) -> cudf.Series:
     """
     Writes the samples to parquet.
     results: cudf.DataFrame
@@ -40,11 +40,13 @@ def _write_samples_to_parquet(
         Either a dictionary containing partition data from dask, the string 'sg'
         indicating that this is a single GPU write, or None indicating that this
         function should perform a no-op (required by dask).
+
+    Returns an empty cudf series.
     """
 
     # Required by dask; need to skip dummy partitions.
     if partition_info is None or len(results) == 0:
-        return
+        return cudf.Series(dtype="int64")
     if partition_info != "sg" and (not isinstance(partition_info, dict)):
         raise ValueError("Invalid value of partition_info")
 
@@ -71,6 +73,8 @@ def _write_samples_to_parquet(
         ).values
         results_p.to_parquet(full_output_path, compression=None, index=False)
 
+    return cudf.Series(dtype="int64")
+
 
 def write_samples(
     results: cudf.DataFrame,
@@ -97,7 +101,9 @@ def write_samples(
             batches_per_partition,
             output_path,
             align_dataframes=False,
+            meta=cudf.Series(dtype="int64"),
         ).compute()
+
     else:
         _write_samples_to_parquet(
             results, offsets, batches_per_partition, output_path, partition_info="sg"
diff --git a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
index d6acaa550eb..d239f92d485 100644
--- a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
@@ -15,6 +15,7 @@
 
 from pylibcugraph import ResourceHandle
 from pylibcugraph import uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample
+from pylibcugraph.utilities.api_tools import deprecated_warning_wrapper
 
 import numpy
 
@@ -29,6 +30,10 @@
     from cugraph import Graph
 
 
+start_col_name = "_START_"
+batch_col_name = "_BATCH_"
+
+
 # FIXME: Move this function to the utility module so that it can be
 # shared by other algos
 def ensure_valid_dtype(input_graph, start_list):
@@ -50,7 +55,7 @@ def ensure_valid_dtype(input_graph, start_list):
     return start_list
 
 
-def uniform_neighbor_sample(
+def _uniform_neighbor_sample_legacy(
     G: Graph,
     start_list: Sequence,
     fanout_vals: List[int],
@@ -60,6 +65,135 @@ def uniform_neighbor_sample(
     random_state: int = None,
     return_offsets: bool = False,
 ) -> Union[cudf.DataFrame, Tuple[cudf.DataFrame, cudf.DataFrame]]:
+
+    warnings.warn(
+        "The batch_id_list parameter is deprecated. "
+        "Consider passing a DataFrame where the last column "
+        "is the batch ids and setting with_batch_ids=True"
+    )
+
+    if isinstance(start_list, int):
+        start_list = [start_list]
+
+    if isinstance(start_list, list):
+        start_list = cudf.Series(
+            start_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype
+        )
+
+    if with_edge_properties and batch_id_list is None:
+        batch_id_list = cp.zeros(len(start_list), dtype="int32")
+
+    # fanout_vals must be a host array!
+    # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
+    if isinstance(fanout_vals, list):
+        fanout_vals = numpy.asarray(fanout_vals, dtype="int32")
+    else:
+        raise TypeError("fanout_vals must be a list, " f"got: {type(fanout_vals)}")
+
+    if "weights" in G.edgelist.edgelist_df:
+        weight_t = G.edgelist.edgelist_df["weights"].dtype
+    else:
+        weight_t = "float32"
+
+    start_list = ensure_valid_dtype(G, start_list)
+
+    if G.renumbered is True:
+        if isinstance(start_list, cudf.DataFrame):
+            start_list = G.lookup_internal_vertex_id(start_list, start_list.columns)
+        else:
+            start_list = G.lookup_internal_vertex_id(start_list)
+
+    sampling_result = pylibcugraph_uniform_neighbor_sample(
+        resource_handle=ResourceHandle(),
+        input_graph=G._plc_graph,
+        start_list=start_list,
+        h_fan_out=fanout_vals,
+        with_replacement=with_replacement,
+        do_expensive_check=False,
+        with_edge_properties=with_edge_properties,
+        batch_id_list=batch_id_list,
+        random_state=random_state,
+    )
+
+    df = cudf.DataFrame()
+
+    if with_edge_properties:
+        (
+            sources,
+            destinations,
+            weights,
+            edge_ids,
+            edge_types,
+            batch_ids,
+            offsets,
+            hop_ids,
+        ) = sampling_result
+
+        df["sources"] = sources
+        df["destinations"] = destinations
+        df["weight"] = weights
+        df["edge_id"] = edge_ids
+        df["edge_type"] = edge_types
+        df["hop_id"] = hop_ids
+
+        if return_offsets:
+            offsets_df = cudf.DataFrame(
+                {
+                    "batch_id": batch_ids,
+                    "offsets": offsets[:-1],
+                }
+            )
+
+        else:
+            if len(batch_ids) > 0:
+                batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets))
+                batch_ids.reset_index(drop=True, inplace=True)
+
+            df["batch_id"] = batch_ids
+
+    else:
+        sources, destinations, indices = sampling_result
+
+        df["sources"] = sources
+        df["destinations"] = destinations
+
+        if indices is None:
+            df["indices"] = None
+        else:
+            df["indices"] = indices
+            if weight_t == "int32":
+                df["indices"] = indices.astype("int32")
+            elif weight_t == "int64":
+                df["indices"] = indices.astype("int64")
+            else:
+                df["indices"] = indices
+
+    if G.renumbered:
+        df = G.unrenumber(df, "sources", preserve_order=True)
+        df = G.unrenumber(df, "destinations", preserve_order=True)
+
+    if return_offsets:
+        return df, offsets_df
+
+    return df
+
+
+uniform_neighbor_sample_legacy = deprecated_warning_wrapper(
+    _uniform_neighbor_sample_legacy
+)
+
+
+def uniform_neighbor_sample(
+    G: Graph,
+    start_list: Sequence,
+    fanout_vals: List[int],
+    with_replacement: bool = True,
+    with_edge_properties: bool = False,
+    batch_id_list: Sequence = None,  # deprecated
+    with_batch_ids: bool = False,
+    random_state: int = None,
+    return_offsets: bool = False,
+) -> Union[cudf.DataFrame, Tuple[cudf.DataFrame, cudf.DataFrame]]:
     """
     Does neighborhood sampling, which samples nodes from a graph based on the
     current node's neighbors, with a corresponding fanout value at each hop.
@@ -85,9 +219,14 @@ def uniform_neighbor_sample(
         edge type, batch id, hop id) with the sampled edges.
 
     batch_id_list: list (int32)
+        Deprecated.
         List of batch ids that will be returned with the sampled edges if
         with_edge_properties is set to True.
 
+    with_batch_ids: bool, optional (default=False)
+        Flag to specify whether batch ids are present in the start_list
+        Assumes they are the last column in the start_list dataframe
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -148,6 +287,18 @@ def uniform_neighbor_sample(
                     Contains the offsets of each batch in the sampling result
     """
 
+    if batch_id_list is not None:
+        return uniform_neighbor_sample_legacy(
+            G,
+            start_list,
+            fanout_vals,
+            with_replacement=with_replacement,
+            with_edge_properties=with_edge_properties,
+            batch_id_list=batch_id_list,
+            random_state=random_state,
+            return_offsets=return_offsets,
+        )
+
     if isinstance(start_list, int):
         start_list = [start_list]
 
@@ -156,8 +307,13 @@ def uniform_neighbor_sample(
             start_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype
         )
 
-    if with_edge_properties and batch_id_list is None:
-        batch_id_list = cp.zeros(len(start_list), dtype="int32")
+    if with_edge_properties and not with_batch_ids:
+        if isinstance(start_list, cudf.Series):
+            start_list = start_list.to_frame()
+
+        start_list[batch_col_name] = cudf.Series(
+            cp.zeros(len(start_list), dtype="int32")
+        )
 
     # fanout_vals must be a host array!
     # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
@@ -173,21 +329,37 @@ def uniform_neighbor_sample(
 
     start_list = ensure_valid_dtype(G, start_list)
 
-    if G.renumbered is True:
-        if isinstance(start_list, cudf.DataFrame):
-            start_list = G.lookup_internal_vertex_id(start_list, start_list.columns)
+    if isinstance(start_list, cudf.Series):
+        start_list = start_list.rename(start_col_name)
+        start_list = start_list.to_frame()
+
+        if G.renumbered:
+            start_list = G.lookup_internal_vertex_id(start_list, start_col_name)
+    else:
+        columns = start_list.columns
+
+        if with_batch_ids:
+            if G.renumbered:
+                start_list = G.lookup_internal_vertex_id(start_list, columns[:-1])
+            start_list = start_list.rename(
+                columns={columns[0]: start_col_name, columns[-1]: batch_col_name}
+            )
         else:
-            start_list = G.lookup_internal_vertex_id(start_list)
+            if G.renumbered:
+                start_list = G.lookup_internal_vertex_id(start_list, columns)
+            start_list = start_list.rename(columns={columns[0]: start_col_name})
 
     sampling_result = pylibcugraph_uniform_neighbor_sample(
         resource_handle=ResourceHandle(),
         input_graph=G._plc_graph,
-        start_list=start_list,
+        start_list=start_list[start_col_name],
+        batch_id_list=start_list[batch_col_name]
+        if batch_col_name in start_list
+        else None,
         h_fan_out=fanout_vals,
         with_replacement=with_replacement,
         do_expensive_check=False,
         with_edge_properties=with_edge_properties,
-        batch_id_list=batch_id_list,
         random_state=random_state,
     )
 
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
index 5d2f050bce9..39d2fbea7dd 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
@@ -285,7 +285,7 @@ def test_uniform_neighbor_sample_unweighted(simple_unweighted_input_expected_out
 
     sampling_results = uniform_neighbor_sample(
         test_data["Graph"],
-        test_data["start_list"],
+        test_data["start_list"].astype("int64"),
         test_data["fanout_vals"],
         test_data["with_replacement"],
     )
@@ -330,11 +330,11 @@ def test_uniform_neighbor_sample_edge_properties(return_offsets):
 
     sampling_results = uniform_neighbor_sample(
         G,
-        start_list=start_df["seed"],
+        start_list=start_df,
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
-        batch_id_list=start_df["batch"],
+        with_batch_ids=True,
         return_offsets=return_offsets,
     )
     if return_offsets:
@@ -389,11 +389,16 @@ def test_uniform_neighbor_sample_edge_properties_self_loops():
 
     sampling_results = cugraph.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0, 1, 2]),
-        batch_id_list=cudf.Series([1, 1, 1], dtype="int32"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 1, 2]),
+                "batch": cudf.Series([1, 1, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
         random_state=80,
     )
 
@@ -460,11 +465,16 @@ def test_uniform_neighbor_sample_hop_id_order_multi_batch():
 
     sampling_results = cugraph.uniform_neighbor_sample(
         G,
-        cudf.Series([0, 1], dtype="int64"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 1], dtype="int64"),
+                "batch": cudf.Series([0, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2, 2],
-        batch_id_list=cudf.Series([0, 1], dtype="int32"),
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     )
 
     for b in range(2):
@@ -502,11 +512,16 @@ def test_uniform_neighbor_sample_empty_start_list():
 
     sampling_results = cugraph.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([], dtype="int64"),
-        batch_id_list=cudf.Series([], dtype="int32"),
+        start_list=cudf.DataFrame(
+            {
+                "start_list": cudf.Series(dtype="int64"),
+                "batch_id_list": cudf.Series(dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
         random_state=32,
     )
 
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
index 033b96487c4..4da3f3cf950 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
@@ -327,7 +327,8 @@ def test_mg_uniform_neighbor_sample_ensure_no_duplicates(dask_client):
 @pytest.mark.cugraph_ops
 @pytest.mark.parametrize("return_offsets", [True, False])
 def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets):
-    if len(dask_client.scheduler_info()["workers"]) <= 1:
+    n_workers = len(dask_client.scheduler_info()["workers"])
+    if n_workers <= 1:
         pytest.skip("Test only valid for MG environments")
     edgelist_df = dask_cudf.from_cudf(
         cudf.DataFrame(
@@ -352,43 +353,58 @@ def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets):
         edge_attr=["w", "eid", "etp"],
     )
 
-    dest_rank = [0, 1]
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0, 4], dtype="int64"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 4], dtype="int64"),
+                "batch": cudf.Series([0, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[-1, -1],
         with_replacement=False,
         with_edge_properties=True,
-        batch_id_list=cudf.Series([0, 1], dtype="int32"),
-        label_list=cudf.Series([0, 1], dtype="int32") if return_offsets else None,
-        label_to_output_comm_rank=cudf.Series(dest_rank, dtype="int32")
-        if return_offsets
-        else None,
+        with_batch_ids=True,
+        keep_batches_together=True,
+        min_batch_id=0,
+        max_batch_id=1,
         return_offsets=return_offsets,
     )
 
     if return_offsets:
         sampling_results, sampling_offsets = sampling_results
 
-        df_p0 = sampling_results.get_partition(0).compute()
-        assert sorted(df_p0.sources.values_host.tolist()) == (
-            [0, 0, 0, 1, 1, 2, 2, 2, 4, 4]
-        )
-        assert sorted(df_p0.destinations.values_host.tolist()) == (
-            [1, 1, 1, 2, 2, 3, 3, 4, 4, 4]
-        )
-
-        df_p1 = sampling_results.get_partition(1).compute()
-        assert sorted(df_p1.sources.values_host.tolist()) == ([1, 1, 3, 3, 4, 4])
-        assert sorted(df_p1.destinations.values_host.tolist()) == ([1, 2, 2, 3, 3, 4])
-
-        offsets_p0 = sampling_offsets.get_partition(0).compute()
-        assert offsets_p0.batch_id.values_host.tolist() == [0]
-        assert offsets_p0.offsets.values_host.tolist() == [0]
-
-        offsets_p1 = sampling_offsets.get_partition(1).compute()
-        assert offsets_p1.batch_id.values_host.tolist() == [1]
-        assert offsets_p1.offsets.values_host.tolist() == [0]
+        batches_found = {0: 0, 1: 0}
+        for i in range(n_workers):
+            dfp = sampling_results.get_partition(i).compute()
+            if len(dfp) > 0:
+                offsets_p = sampling_offsets.get_partition(i).compute()
+                assert len(offsets_p) > 0
+
+                if offsets_p.batch_id.iloc[0] == 1:
+                    batches_found[1] += 1
+
+                    assert offsets_p.batch_id.values_host.tolist() == [1]
+                    assert offsets_p.offsets.values_host.tolist() == [0]
+
+                    assert sorted(dfp.sources.values_host.tolist()) == (
+                        [1, 1, 3, 3, 4, 4]
+                    )
+                    assert sorted(dfp.destinations.values_host.tolist()) == (
+                        [1, 2, 2, 3, 3, 4]
+                    )
+                elif offsets_p.batch_id.iloc[0] == 0:
+                    batches_found[0] += 1
+
+                    assert offsets_p.batch_id.values_host.tolist() == [0]
+                    assert offsets_p.offsets.values_host.tolist() == [0]
+
+                    assert sorted(dfp.sources.values_host.tolist()) == (
+                        [0, 0, 0, 1, 1, 2, 2, 2, 4, 4]
+                    )
+                    assert sorted(dfp.destinations.values_host.tolist()) == (
+                        [1, 1, 1, 2, 2, 3, 3, 4, 4, 4]
+                    )
 
     mdf = cudf.merge(
         sampling_results.compute(),
@@ -446,13 +462,19 @@ def test_uniform_neighbor_sample_edge_properties_self_loops(dask_client):
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=dask_cudf.from_cudf(cudf.Series([0, 1, 2]), npartitions=2),
-        batch_id_list=dask_cudf.from_cudf(
-            cudf.Series([1, 1, 1], dtype="int32"), npartitions=2
+        start_list=dask_cudf.from_cudf(
+            cudf.DataFrame(
+                {
+                    "start": cudf.Series([0, 1, 2], dtype="int64"),
+                    "batch": cudf.Series([1, 1, 1], dtype="int32"),
+                }
+            ),
+            npartitions=2,
         ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     ).compute()
 
     assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2]
@@ -526,23 +548,32 @@ def test_uniform_neighbor_sample_hop_id_order_multi_batch():
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        cudf.Series([0, 1], dtype="int64"),
+        dask_cudf.from_cudf(
+            cudf.DataFrame(
+                {
+                    "start": cudf.Series([0, 1], dtype="int64"),
+                    "batch": cudf.Series([0, 1], dtype="int32"),
+                }
+            ),
+            npartitions=2,
+        ),
         fanout_vals=[2, 2, 2],
-        batch_id_list=cudf.Series([0, 1], dtype="int32"),
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     )
 
     for p in range(sampling_results.npartitions):
         sampling_results_p = sampling_results.get_partition(p)
-        for b in range(2):
-            sampling_results_pb = sampling_results_p[
-                sampling_results_p.batch_id == b
-            ].compute()
-            assert (
-                sorted(sampling_results_pb.hop_id.values_host.tolist())
-                == sampling_results_pb.hop_id.values_host.tolist()
-            )
+        if len(sampling_results_p) > 0:
+            for b in range(2):
+                sampling_results_pb = sampling_results_p[
+                    sampling_results_p.batch_id == b
+                ].compute()
+                assert (
+                    sorted(sampling_results_pb.hop_id.values_host.tolist())
+                    == sampling_results_pb.hop_id.values_host.tolist()
+                )
 
 
 @pytest.mark.mg
@@ -577,11 +608,19 @@ def test_uniform_neighbor_edge_properties_sample_small_start_list(
 
     cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0]),
+        start_list=dask_cudf.from_cudf(
+            cudf.Series(
+                {
+                    "start": cudf.Series([0]),
+                    "batch": cudf.Series([10], dtype="int32"),
+                }
+            ),
+            npartitions=1,
+        ),
         fanout_vals=[10, 25],
         with_replacement=with_replacement,
         with_edge_properties=True,
-        batch_id_list=cudf.Series([10], dtype="int32"),
+        with_batch_ids=True,
     )
 
 
@@ -610,11 +649,16 @@ def test_uniform_neighbor_sample_without_dask_inputs(dask_client):
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0, 1, 2]),
-        batch_id_list=cudf.Series([1, 1, 1], dtype="int32"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 1, 2]),
+                "batch": cudf.Series([1, 1, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     ).compute()
 
     assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2]
@@ -664,24 +708,24 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat
     input_vertices = dask_cudf.concat([df.src, df.dst]).unique().compute()
     assert isinstance(input_vertices, cudf.Series)
 
+    input_vertices.name = "start"
     input_vertices.index = cupy.random.permutation(len(input_vertices))
+    input_vertices = input_vertices.to_frame().reset_index(drop=True)
 
-    input_batch = cudf.Series(
+    input_vertices["batch"] = cudf.Series(
         cupy.random.randint(0, max_batches, len(input_vertices)), dtype="int32"
     )
-    input_batch.index = cupy.random.permutation(len(input_vertices))
 
     if input_df == dask_cudf.DataFrame:
-        input_batch = dask_cudf.from_cudf(input_batch, npartitions=num_workers)
         input_vertices = dask_cudf.from_cudf(input_vertices, npartitions=num_workers)
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
         start_list=input_vertices,
-        batch_id_list=input_batch,
         fanout_vals=[5, 5],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     )
 
     for batch_id in range(max_batches):
@@ -693,7 +737,7 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat
             .compute()
         )
 
-        input_starts_per_batch = len(input_batch[input_batch == batch_id])
+        input_starts_per_batch = len(input_vertices[input_vertices.batch == batch_id])
 
         # Should be <= to account for starts without outgoing edges
         assert output_starts_per_batch <= input_starts_per_batch