rapidsai · seunghwak · Jul 15, 2024 · Jul 15, 2024 · Jul 15, 2024 · Jul 15, 2024
@@ -255,7 +255,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
     if (dcs_nzd_vertices_) {
       assert(major_hypersparse_first_);
-      thrust::transform(rmm::exec_policy(stream),
+      thrust::transform(rmm::exec_policy_nosync(stream),
                         thrust::make_counting_iterator(this->major_range_first()),
                         thrust::make_counting_iterator(this->major_range_last()),
                         local_degrees.begin(),
@@ -266,7 +266,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
                           major_hypersparse_first_.value_or(vertex_t{0})});
     } else {
       thrust::transform(
-        rmm::exec_policy(stream),
+        rmm::exec_policy_nosync(stream),
         thrust::make_counting_iterator(this->major_range_first()),
         thrust::make_counting_iterator(this->major_range_last()),
         local_degrees.begin(),
@@ -284,7 +284,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
     if (dcs_nzd_vertices_) {
       assert(major_hypersparse_first_);
-      thrust::transform(rmm::exec_policy(stream),
+      thrust::transform(rmm::exec_policy_nosync(stream),
                         major_first,
                         major_last,
                         local_degrees.begin(),
@@ -295,7 +295,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
                           major_hypersparse_first_.value_or(vertex_t{0})});
     } else {
       thrust::transform(
-        rmm::exec_policy(stream),
+        rmm::exec_policy_nosync(stream),
         major_first,
         major_last,
         local_degrees.begin(),
@@ -355,7 +355,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     if (dcs_nzd_vertices_) {
       assert(major_hypersparse_first_);
       thrust::transform(
-        rmm::exec_policy(stream),
+        rmm::exec_policy_nosync(stream),
         thrust::make_counting_iterator(this->major_range_first()),
         thrust::make_counting_iterator(this->major_range_last()),
         local_degrees.begin(),
@@ -368,7 +368,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
             mask_first});
     } else {
       thrust::transform(
-        rmm::exec_policy(stream),
+        rmm::exec_policy_nosync(stream),
         thrust::make_counting_iterator(this->major_range_first()),
         thrust::make_counting_iterator(this->major_range_last()),
         local_degrees.begin(),
@@ -394,7 +394,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     if (dcs_nzd_vertices_) {
       assert(major_hypersparse_first_);
       thrust::transform(
-        rmm::exec_policy(stream),
+        rmm::exec_policy_nosync(stream),
         major_first,
         major_last,
         local_degrees.begin(),
@@ -407,7 +407,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
             mask_first});
     } else {
       thrust::transform(
-        rmm::exec_policy(stream),
+        rmm::exec_policy_nosync(stream),
         major_first,
         major_last,
         local_degrees.begin(),
@@ -577,7 +577,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
   __host__ rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
   {
     rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
-    thrust::transform(rmm::exec_policy(stream),
+    thrust::transform(rmm::exec_policy_nosync(stream),
                       thrust::make_counting_iterator(this->major_range_first()),
                       thrust::make_counting_iterator(this->major_range_last()),
                       local_degrees.begin(),
@@ -595,7 +595,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
                                                              rmm::cuda_stream_view stream) const
   {
     rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
-    thrust::transform(rmm::exec_policy(stream),
+    thrust::transform(rmm::exec_policy_nosync(stream),
                       major_first,
                       major_last,
                       local_degrees.begin(),
@@ -638,7 +638,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
   {
     rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
     thrust::transform(
-      rmm::exec_policy(stream),
+      rmm::exec_policy_nosync(stream),
       thrust::make_counting_iterator(this->major_range_first()),
       thrust::make_counting_iterator(this->major_range_last()),
       local_degrees.begin(),
@@ -660,7 +660,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
   {
     rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
     thrust::transform(
-      rmm::exec_policy(stream),
+      rmm::exec_policy_nosync(stream),
       major_first,
       major_last,
       local_degrees.begin(),

@@ -1114,7 +1114,8 @@ shuffle_external_vertex_value_pairs(raft::handle_t const& handle,
  * @param edge_ids  Optional list of edge ids
  * @param edge_types Optional list of edge types
  * @return Tuple of vectors storing edge sources, destinations, optional weights,
- *          optional edge ids, optional edge types mapped to this GPU.
+ *          optional edge ids, optional edge types mapped to this GPU and a vector storing the
+ *          number of edges received from each GPU.
  */
 template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_t>
 std::tuple<rmm::device_uvector<vertex_t>,

@@ -243,7 +243,7 @@ namespace detail {
 // use (key, value) pairs to store source/destination properties if (unique edge
 // sources/destinations) over (V / major_comm_size|minor_comm_size) is smaller than the threshold
 // value
-double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold = 0.1;
+double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold = 0.0;  // FIXME: just for benchmarking
 
 // FIXME: threshold values require tuning
 // use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller

@@ -71,6 +71,30 @@ class partition_manager {
                                           : (major_comm_rank * minor_comm_size + minor_comm_rank);
   }
 
+#ifdef __CUDACC__
+  __host__ __device__
+#endif
+    static int
+    compute_major_comm_rank_from_global_comm_rank(int major_comm_size,
+                                                  int minor_comm_size,
+                                                  int comm_rank)
+  {
+    return map_major_comm_to_gpu_row_comm ? comm_rank % major_comm_size
+                                          : comm_rank / minor_comm_size;
+  }
+
+#ifdef __CUDACC__
+  __host__ __device__
+#endif
+    static int
+    compute_minor_comm_rank_from_global_comm_rank(int major_comm_size,
+                                                  int minor_comm_size,
+                                                  int comm_rank)
+  {
+    return map_major_comm_to_gpu_row_comm ? comm_rank / major_comm_size
+                                          : comm_rank % minor_comm_size;
+  }
+
 #ifdef __CUDACC__
   __host__ __device__
 #endif

@@ -82,6 +82,53 @@ auto allocate_dataframe_buffer(size_t buffer_size, rmm::cuda_stream_view stream_
     std::make_index_sequence<tuple_size>(), buffer_size, stream_view);
 }
 
+template <typename T>
+struct dataframe_buffer_type {
+  using type = decltype(allocate_dataframe_buffer<T>(size_t{0}, rmm::cuda_stream_view{}));
+};
+
+template <typename T>
+using dataframe_buffer_type_t = typename dataframe_buffer_type<T>::type;
+
+template <typename T>
+std::optional<dataframe_buffer_type_t<T>> try_allocate_dataframe_buffer(
+  size_t buffer_size, rmm::cuda_stream_view stream_view)
+{
+  try {
+    return allocate_dataframe_buffer<T>(buffer_size, stream_view);
+  } catch (std::exception const& e) {
+    return std::nullopt;
+  }
+}
+
+template <typename T>
+struct dataframe_buffer_iterator_type {
+  using type = typename rmm::device_uvector<T>::iterator;
+};
+
+template <typename... Ts>
+struct dataframe_buffer_iterator_type<thrust::tuple<Ts...>> {
+  using type = thrust::zip_iterator<thrust::tuple<typename rmm::device_uvector<Ts>::iterator...>>;
+};
+
+template <typename T>
+using dataframe_buffer_iterator_type_t = typename dataframe_buffer_iterator_type<T>::type;
+
+template <typename T>
+struct dataframe_buffer_const_iterator_type {
+  using type = typename rmm::device_uvector<T>::const_iterator;
+};
+
+template <typename... Ts>
+struct dataframe_buffer_const_iterator_type<thrust::tuple<Ts...>> {
+  using type =
+    thrust::zip_iterator<thrust::tuple<typename rmm::device_uvector<Ts>::const_iterator...>>;
+};
+
+template <typename T>
+using dataframe_buffer_const_iterator_type_t =
+  typename dataframe_buffer_const_iterator_type<T>::type;
+
 template <typename BufferType>
 void reserve_dataframe_buffer(BufferType& buffer,
                               size_t new_buffer_capacity,
@@ -206,30 +253,4 @@ auto get_dataframe_buffer_cend(BufferType& buffer)
     std::make_index_sequence<std::tuple_size<BufferType>::value>(), buffer);
 }
 
-template <typename T>
-struct dataframe_buffer_value_type {
-  using type = void;
-};
-
-template <typename T>
-struct dataframe_buffer_value_type<rmm::device_uvector<T>> {
-  using type = T;
-};
-
-template <typename... Ts>
-struct dataframe_buffer_value_type<std::tuple<rmm::device_uvector<Ts>...>> {
-  using type = thrust::tuple<Ts...>;
-};
-
-template <typename BufferType>
-using dataframe_buffer_value_type_t = typename dataframe_buffer_value_type<BufferType>::type;
-
-template <typename T>
-struct dataframe_buffer_type {
-  using type = decltype(allocate_dataframe_buffer<T>(size_t{0}, rmm::cuda_stream_view{}));
-};
-
-template <typename T>
-using dataframe_buffer_type_t = typename dataframe_buffer_type<T>::type;
-
 }  // namespace cugraph