From 0adb2fdb31e3dc24c00df069348bb8017020aef8 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 13 Aug 2024 08:49:42 -0700 Subject: [PATCH 01/68] support heterogenous fanout type --- cpp/include/cugraph/sampling_functions.hpp | 223 +++++++++++++++++- cpp/include/cugraph_c/sampling_algorithms.h | 47 ++++ cpp/src/c_api/array.hpp | 12 + cpp/src/c_api/graph_functions.cpp | 2 +- cpp/src/c_api/neighbor_sampling.cpp | 150 +++++++++++- cpp/src/c_api/neighbor_sampling.hpp | 30 +++ cpp/src/sampling/neighbor_sampling_impl.hpp | 47 ++-- .../sampling/neighbor_sampling_mg_v32_e32.cpp | 8 +- .../sampling/neighbor_sampling_mg_v32_e64.cpp | 8 +- .../sampling/neighbor_sampling_mg_v64_e64.cpp | 8 +- .../sampling/neighbor_sampling_sg_v32_e32.cpp | 8 +- .../sampling/neighbor_sampling_sg_v32_e64.cpp | 8 +- .../sampling/neighbor_sampling_sg_v64_e64.cpp | 8 +- .../dask/sampling/uniform_neighbor_sample.py | 15 +- .../sampling/uniform_neighbor_sample.py | 17 +- .../_cugraph_c/sampling_algorithms.pxd | 18 ++ .../pylibcugraph/uniform_neighbor_sample.pyx | 75 ++++-- 17 files changed, 634 insertions(+), 50 deletions(-) create mode 100644 cpp/src/c_api/neighbor_sampling.hpp diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index fec1a07604e..5b884fa611e 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -129,7 +129,8 @@ uniform_neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - raft::host_span fan_out, + std::optional> fan_out, + std::optional, raft::host_span>> heterogeneous_fan_out, raft::random::RngState& rng_state, bool return_hops, bool with_replacement = true, @@ -137,6 +138,114 @@ uniform_neighbor_sample( bool dedupe_sources = false, bool do_expensive_check = false); +#if 0 +/* FIXME: + There are two options to support heterogeneous fanout + 1) Create a new function 'heterogeneous_uniform_neighbor_sample' which will take as input + only heterogeneous fanout type. Drawback: code redundancy + 2) Update 'uniform_neighbor_sample' to support both fanout types +*/ +/** + * @brief Heterogeneous uniform Neighborhood Sampling. + * + * This function traverses from a set of starting vertices, traversing outgoing edges and + * randomly selects from these outgoing neighbors to extract a subgraph. + * + * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop, + * label, offsets), identifying the randomly selected edges. src is the source vertex, dst is the + * destination vertex, weight (optional) is the edge weight, edge_id (optional) identifies the edge + * id, edge_type (optional) identifies the edge type, hop identifies which hop the edge was + * encountered in. The label output (optional) identifes the vertex label. The offsets array + * (optional) will be described below and is dependent upon the input parameters. + * + * If @p starting_vertex_labels is not specified then no organization is applied to the output, the + * label and offsets values in the return set will be std::nullopt. + * + * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is not specified then + * the label output has values. This will also result in the output being sorted by vertex label. + * The offsets array in the return will be a CSR-style offsets array to identify the beginning of + * each label range in the data. `labels.size() == (offsets.size() - 1)`. + * + * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is specified then the + * label output has values. This will also result in the output being sorted by vertex label. The + * offsets array in the return will be a CSR-style offsets array to identify the beginning of each + * label range in the data. `labels.size() == (offsets.size() - 1)`. Additionally, the data will + * be shuffled so that all data with a particular label will be on the specified rank. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam edge_type_t Type of edge type. Needs to be an integral type. + * @tparam label_t Type of label. Needs to be an integral type. + * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if + * true) are major indices + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph View object to generate NBR Sampling on. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. + * @param edge_id_view Optional view object holding edge ids for @p graph_view. + * @param edge_type_view Optional view object holding edge types for @p graph_view. + * @param starting_vertices Device span of starting vertex IDs for the sampling. + * In a multi-gpu context the starting vertices should be local to this GPU. + * @param starting_vertex_labels Optional device span of labels associted with each starting vertex + * for the sampling. + * @param label_to_output_comm_rank Optional tuple of device spans mapping label to a particular + * output rank. Element 0 of the tuple identifes the label, Element 1 of the tuple identifies the + * output rank. The label span must be sorted in ascending order. + * @param fan_out Tuple of host spans mapping each edge type to fanout values. Element 0 + * of the tuple defines the size of the fanout per edge type while element 1 defines the branching out + * (fan-out) degree per edge type for each level. + * @param rng_state A pre-initialized raft::RngState object for generating random numbers + * @param return_hops boolean flag specifying if the hop information should be returned + * @param prior_sources_behavior Enum type defining how to handle prior sources, (defaults to + * DEFAULT) + * @param dedupe_sources boolean flag, if true then if a vertex v appears as a destination in hop X + * multiple times with the same label, it will only be passed once (for each label) as a source + * for the next hop. Default is false. + * @param with_replacement boolean flag specifying if random sampling is done with replacement + * (true); or, without replacement (false); default = true; + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex, + * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type, + * optional int32_t hop, optional label_t label, optional size_t offsets) + */ + +// tuple with 3 elements. 1 - edge_type, 2- host span of size_t, 3 - fanout vector +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +heterogeneous_uniform_neighbor_sample( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + std::tuple, raft::host_span> fan_out, + raft::random::RngState& rng_state, + bool return_hops, + bool with_replacement = true, + prior_sources_behavior_t prior_sources_behavior = prior_sources_behavior_t::DEFAULT, + bool dedupe_sources = false, + bool do_expensive_check = false); +#endif + /** * @brief Biased Neighborhood Sampling. * @@ -240,6 +349,118 @@ biased_neighbor_sample( bool dedupe_sources = false, bool do_expensive_check = false); +#if 0 +/* FIXME: + There are two options to support heterogeneous fanout + 1) Create a new function 'heterogeneous_biased_neighbor_sample' which will take as input + only heterogeneous fanout type. Drawback: code redundancy + 2) Update 'biased_neighbor_sample' to support both fanout types +*/ +/** + * @brief Biased Neighborhood Sampling. + * + * This function traverses from a set of starting vertices, traversing outgoing edges and + * randomly selects (with edge biases) from these outgoing neighbors to extract a subgraph. + * + * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop, + * label, offsets), identifying the randomly selected edges. src is the source vertex, dst is the + * destination vertex, weight (optional) is the edge weight, edge_id (optional) identifies the edge + * id, edge_type (optional) identifies the edge type, hop identifies which hop the edge was + * encountered in. The label output (optional) identifes the vertex label. The offsets array + * (optional) will be described below and is dependent upon the input parameters. + * + * If @p starting_vertex_labels is not specified then no organization is applied to the output, the + * label and offsets values in the return set will be std::nullopt. + * + * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is not specified then + * the label output has values. This will also result in the output being sorted by vertex label. + * The offsets array in the return will be a CSR-style offsets array to identify the beginning of + * each label range in the data. `labels.size() == (offsets.size() - 1)`. + * + * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is specified then the + * label output has values. This will also result in the output being sorted by vertex label. The + * offsets array in the return will be a CSR-style offsets array to identify the beginning of each + * label range in the data. `labels.size() == (offsets.size() - 1)`. Additionally, the data will + * be shuffled so that all data with a particular label will be on the specified rank. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam edge_type_t Type of edge type. Needs to be an integral type. + * @tparam label_t Type of label. Needs to be an integral type. + * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if + * true) are major indices + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph View object to generate NBR Sampling on. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. + * @param edge_id_view Optional view object holding edge ids for @p graph_view. + * @param edge_type_view Optional view object holding edge types for @p graph_view. + * @param edge_bias_view View object holding edge biases (to be used in biased sampling) for @p + * graph_view. Bias values should be non-negative and the sum of edge bias values from any vertex + * should not exceed std::numeric_limits::max(). 0 bias value indicates that the + * corresponding edge can never be selected. + * @param starting_vertices Device span of starting vertex IDs for the sampling. + * In a multi-gpu context the starting vertices should be local to this GPU. + * @param starting_vertex_labels Optional device span of labels associted with each starting vertex + * for the sampling. + * @param label_to_output_comm_rank Optional tuple of device spans mapping label to a particular + * output rank. Element 0 of the tuple identifes the label, Element 1 of the tuple identifies the + * output rank. The label span must be sorted in ascending order. + * @param fan_out Tuple of host spans mapping each edge type to fanout values. Element 0 + * of the tuple defines the size of the fanout per edge type while element 1 defines the branching out + * (fan-out) degree per edge type for each level. + * @param rng_state A pre-initialized raft::RngState object for generating random numbers + * @param return_hops boolean flag specifying if the hop information should be returned + * @param prior_sources_behavior Enum type defining how to handle prior sources, (defaults to + * DEFAULT) + * @param dedupe_sources boolean flag, if true then if a vertex v appears as a destination in hop X + * multiple times with the same label, it will only be passed once (for each label) as a source + * for the next hop. Default is false. + * @param with_replacement boolean flag specifying if random sampling is done with replacement + * (true); or, without replacement (false); default = true; + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex, + * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type, + * optional int32_t hop, optional label_t label, optional size_t offsets) + */ +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +heterogeneous_biased_neighbor_sample( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + edge_property_view_t edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + std::tuple, raft::host_span> fan_out, + raft::random::RngState& rng_state, + bool return_hops, + bool with_replacement = true, + prior_sources_behavior_t prior_sources_behavior = prior_sources_behavior_t::DEFAULT, + bool dedupe_sources = false, + bool do_expensive_check = false); +#endif + /* * @brief renumber sampled edge list and compress to the (D)CSR|(D)CSC format. * diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 1a3d20b9339..e440512c482 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -319,6 +319,49 @@ void cugraph_sampling_set_dedupe_sources(cugraph_sampling_options_t* options, bo */ void cugraph_sampling_options_free(cugraph_sampling_options_t* options); +/** + * @brief Opaque neighborhood sampling heterogeneous fanout type + */ +// FIXME: internal representation should be tuple instead of pairs - Make it more generic (tuple) +// cugraph_device_tuple_t, host_device_tuple_t, +//dictionary, key and array +// translate dictionary to a tuple. Add to the draft PR the PLC layer. +// Concatenate to build the 3 arrays from the PLC layer +/// mimic +typedef struct { + int32_t align_; +} cugraph_sample_heterogeneous_fanout_t; + +/** + * @brief Create heterogeneous fanout + * + * Input data will be stored in the heterogenous_fanout. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] edge_type_size Type erased array of edge type size + * @param [in] fanout Type erased array of fanout values + * @param [out] heterogeneous_fanout Opaque pointer to fanout_t + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_create_heterogeneous_fanout( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_host_array_view_t* edge_type_size, + const cugraph_type_erased_host_array_view_t* fanout, + cugraph_sample_heterogeneous_fanout_t** heterogeneous_fanout, + cugraph_error_t** error); + +/** + * @brief Free edge type and fanout pairs + * + * @param [in] heterogeneous_fanout The edge type size and fanout values + */ +void cugraph_heterogeneous_fanout_free( + cugraph_sample_heterogeneous_fanout_t* heterogeneous_fanout); + /** * @brief Uniform Neighborhood Sampling * @@ -368,6 +411,7 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( const cugraph_type_erased_device_array_view_t* label_to_comm_rank, const cugraph_type_erased_device_array_view_t* label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, + const cugraph_sample_heterogeneous_fanout_t* heterogeneous_fanout, cugraph_rng_state_t* rng_state, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -667,6 +711,8 @@ cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_create( * not CUGRAPH_SUCCESS * @return error code */ + + cugraph_error_code_t cugraph_select_random_vertices(const cugraph_resource_handle_t* handle, const cugraph_graph_t* graph, cugraph_rng_state_t* rng_state, @@ -674,6 +720,7 @@ cugraph_error_code_t cugraph_select_random_vertices(const cugraph_resource_handl cugraph_type_erased_device_array_t** vertices, cugraph_error_t** error); + #ifdef __cplusplus } #endif diff --git a/cpp/src/c_api/array.hpp b/cpp/src/c_api/array.hpp index 048d2ee1cea..2dce70fa924 100644 --- a/cpp/src/c_api/array.hpp +++ b/cpp/src/c_api/array.hpp @@ -125,6 +125,18 @@ struct cugraph_type_erased_host_array_t { std::copy(vec.begin(), vec.end(), reinterpret_cast(data_.get())); } + template + T* as_type() + { + return reinterpret_cast(data_.get()); + } + + template + T const* as_type() const + { + return reinterpret_cast(data_.get()); + } + auto view() { return new cugraph_type_erased_host_array_view_t{data_.get(), size_, num_bytes_, type_}; diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index df741a349d2..349598b7ef6 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -84,7 +84,7 @@ struct create_vertex_pairs_functor : public cugraph::c_api::abstract_functor { std::nullopt, std::nullopt); } - + // std::tuple (template) result_ = new cugraph::c_api::cugraph_vertex_pairs_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(first_copy, graph_->vertex_type_), new cugraph::c_api::cugraph_type_erased_device_array_t(second_copy, graph_->vertex_type_)}; diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 69306806030..18e158cbd6d 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -20,8 +20,10 @@ #include "c_api/random.hpp" #include "c_api/resource_handle.hpp" #include "c_api/utils.hpp" +#include "c_api/neighbor_sampling.hpp" // FIXME: Remove this and instead use std::tuple #include +#include #include #include @@ -72,6 +74,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_to_comm_rank_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_offsets_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; + cugraph::c_api::cugraph_sample_heterogeneous_fanout_t const* heterogeneous_fan_out_{nullptr}; cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; cugraph::c_api::cugraph_sampling_options_t options_{}; bool do_expensive_check_{false}; @@ -86,6 +89,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct cugraph_type_erased_device_array_view_t const* label_to_comm_rank, cugraph_type_erased_device_array_view_t const* label_offsets, cugraph_type_erased_host_array_view_t const* fan_out, + cugraph_sample_heterogeneous_fanout_t const* heterogeneous_fan_out, cugraph_rng_state_t* rng_state, cugraph::c_api::cugraph_sampling_options_t options, bool do_expensive_check) @@ -108,6 +112,8 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct label_offsets)), fan_out_( reinterpret_cast(fan_out)), + heterogeneous_fan_out_( + reinterpret_cast(heterogeneous_fan_out)), rng_state_(reinterpret_cast(rng_state)), options_(options), do_expensive_check_(do_expensive_check) @@ -215,7 +221,17 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct raft::device_span{label_to_comm_rank_->as_type(), label_to_comm_rank_->size_})) : std::nullopt, - raft::host_span(fan_out_->as_type(), fan_out_->size_), + (fan_out_ != nullptr) + ? std::make_optional>(fan_out_->as_type(), fan_out_->size_) + : std::nullopt, + + (heterogeneous_fan_out_ != nullptr) + ? std::make_optional(std::make_tuple( + raft::host_span{heterogeneous_fan_out_->edge_type_id->as_type(), + heterogeneous_fan_out_->edge_type_id->size_}, + raft::host_span{heterogeneous_fan_out_->fanout->as_type(), + heterogeneous_fan_out_->fanout->size_})) + : std::nullopt, rng_state_->rng_state_, options_.return_hops_, options_.with_replacement_, @@ -753,6 +769,121 @@ struct biased_neighbor_sampling_functor : public cugraph::c_api::abstract_functo } }; + struct create_heterogeneous_fanout_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_graph_t* graph_; + cugraph::c_api::cugraph_type_erased_host_array_view_t const* edge_type_size_; + cugraph::c_api::cugraph_type_erased_host_array_view_t const* fanout_; + // FIXME: This type doesn't exist: instead create an 'std::tuple' + cugraph::c_api::cugraph_sample_heterogeneous_fanout_t* result_{}; + + create_heterogeneous_fanout_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + ::cugraph_type_erased_host_array_view_t const* edge_type_size, + ::cugraph_type_erased_host_array_view_t const* fanout) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + edge_type_size_( + reinterpret_cast(edge_type_size)), + fanout_( + reinterpret_cast(fanout)) + { + } + + template + void operator()() + { + // FIXME: Remove this check as it is not necessary + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + std::vector edge_type_size_copy{(int32_t) edge_type_size_->size_}; + std::vector fanout_copy{(int32_t) fanout_->size_}; + + raft::copy( + edge_type_size_copy.data(), edge_type_size_->as_type(), edge_type_size_->size_, handle_.get_stream()); + + raft::copy( + fanout_copy.data(), fanout_->as_type(), fanout_->size_, handle_.get_stream()); + + // std::tuple (template) + // result_ = new std::tuple