rapidsai · rapids-bot · Mar 22, 2023 · Feb 14, 2023 · Feb 14, 2023 · Feb 16, 2023
@@ -1711,60 +1711,89 @@ k_core(raft::handle_t const& handle,
  * randomly selects from these outgoing neighbors to extract a subgraph.
  *
  * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop,
- * label), identifying the randomly selected edges.  src is the source vertex, dst is the
+ * label, offsets), identifying the randomly selected edges.  src is the source vertex, dst is the
  * destination vertex, weight (optional) is the edge weight, edge_id (optional) identifies the edge
  * id, edge_type (optional) identifies the edge type, hop identifies which hop the edge was
- * encountered in, label (optional) identifies which vertex label this edge was derived from.
+ * encountered in.  The label output (optional) identifes the vertex label.  The offsets array
+ * (optional) will be described below and is dependent upon the input parameters.
+ *
+ *
+ * If @p starting_vertex_labels is not specified then no organization is applied to the output, the
+ * label and offsets values in the return set will be std::nullopt.
+ *
+ * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is not specified then
+ * the label output has values.  This will also result in the output being sorted by vertex label.
+ * The offsets array in the return will be a CSR-style offsets array to identify the beginning of
+ * each label range in the data.  `labels.size() == (offsets.size() - 1)`.
+ *
+ * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is specified then the
+ * label output has values.  This will also result in the output being sorted by vertex label.  The
+ * offsets array in the return will be a CSR-style offsets array to identify the beginning of each
+ * label range in the data.  `labels.size() == (offsets.size() - 1)`.  Additionally, the data will
+ * be shuffled so that all data with a particular label will be on the specified rank.
  *
  * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
  * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
  * @tparam weight_t Type of edge weights. Needs to be a floating point type.
  * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ * @tparam label_t Type of label. Needs to be an integral type.
  * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if
  * true) are major indices
  * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
  * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
  * handles to various CUDA libraries) to run graph algorithms.
  * @param graph_view Graph View object to generate NBR Sampling on.
  * @param edge_weight_view Optional view object holding edge weights for @p graph_view.
- * @param edge_id_type_view Optional view object holding edge ids and types for @p graph_view.
- * @param starting_vertices Device vector of starting vertex IDs for the sampling.
- * @param starting_labels Optional device vector of starting vertex labels for the sampling.
+ * @param edge_id_view Optional view object holding edge ids for @p graph_view.
+ * @param edge_type_view Optional view object holding edge types for @p graph_view.
+ * @param starting_vertices Device span of starting vertex IDs for the sampling.
+ * In a multi-gpu context the starting vertices should be local to this GPU.
+ * @param starting_vertex_labels Optional device span of labels associted with each starting vertex
+ * for the sampling.
+ * @param label_to_output_comm_rank Optional tuple of device spans mapping label to a particular
+ * output rank.  Element 0 of the tuple identifes the label, Element 1 of the tuple identifies the
+ * output rank.  The label span must be sorted in ascending order.
  * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each
  * level
+ * @param rng_state A pre-initialized raft::RngState object for generating random numbers
+ * @param return_hops boolean flag specifying if the hop information should be returned
  * @param with_replacement boolean flag specifying if random sampling is done with replacement
  * (true); or, without replacement (false); default = true;
- * @param rng_state A pre-initialized raft::RngState object for generating random numbers
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
  * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex,
- * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type, int32_t hop,
- * optional int32_t label)
+ * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type,
+ * optional int32_t hop, optional label_t label, optional size_t offsets)
  */
 template <typename vertex_t,
           typename edge_t,
           typename weight_t,
           typename edge_type_t,
+          typename label_t,
           bool store_transposed,
           bool multi_gpu>
 std::tuple<rmm::device_uvector<vertex_t>,
            rmm::device_uvector<vertex_t>,
            std::optional<rmm::device_uvector<weight_t>>,
            std::optional<rmm::device_uvector<edge_t>>,
            std::optional<rmm::device_uvector<edge_type_t>>,
-           rmm::device_uvector<int32_t>,
-           std::optional<rmm::device_uvector<int32_t>>>
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<label_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
 uniform_neighbor_sample(
   raft::handle_t const& handle,
   graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
-  std::optional<
-    edge_property_view_t<edge_t,
-                         thrust::zip_iterator<thrust::tuple<edge_t const*, edge_type_t const*>>>>
-    edge_id_type_view,
-  rmm::device_uvector<vertex_t>&& starting_vertices,
-  std::optional<rmm::device_uvector<int32_t>>&& starting_labels,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<label_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
   raft::host_span<int32_t const> fan_out,
   raft::random::RngState& rng_state,
-  bool with_replacement = true);
+  bool return_hops,
+  bool with_replacement   = true,
+  bool do_expensive_check = false);
 
 /*
  * @brief Compute triangle counts.

@@ -40,23 +40,25 @@ namespace detail {
  * partitioning to determine the local GPU.
  * @param[in] minors Vector of second elements in vertex pairs.
  * @param[in] weights Optional vector of vertex pair weight values.
- * @param[in] edge_id_type_tuple Optional tuple of vectors of edge id and edge type values
+ * @param[in] edge_ids Optional vector of vertex pair edge id values.
+ * @param[in] edge_types Optional vector of vertex pair edge type values.
  *
- * @return Tuple of vectors storing shuffled major vertices, minor vertices and optional weights.
+ * @return Tuple of vectors storing shuffled major vertices, minor vertices and optional weights,
+ * edge ids and edge types
  */
 template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_id_t>
-std::tuple<
-  rmm::device_uvector<vertex_t>,
-  rmm::device_uvector<vertex_t>,
-  std::optional<rmm::device_uvector<weight_t>>,
-  std::optional<std::tuple<rmm::device_uvector<edge_t>, rmm::device_uvector<edge_type_id_t>>>>
-shuffle_ext_vertex_pairs_to_local_gpu_by_edge_partitioning(
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_id_t>>>
+shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning(
   raft::handle_t const& handle,
   rmm::device_uvector<vertex_t>&& majors,
   rmm::device_uvector<vertex_t>&& minors,
   std::optional<rmm::device_uvector<weight_t>>&& weights,
-  std::optional<std::tuple<rmm::device_uvector<edge_t>, rmm::device_uvector<edge_type_id_t>>>&&
-    edge_id_type_tuple);
+  std::optional<rmm::device_uvector<edge_t>>&& edge_ids,
+  std::optional<rmm::device_uvector<edge_type_id_t>>&& edge_types);
 
 /**
  * @brief Shuffle internal (i.e. renumbered) vertex pairs (which can be edge end points) to their
@@ -75,25 +77,28 @@ shuffle_ext_vertex_pairs_to_local_gpu_by_edge_partitioning(
  * partitioning to determine the local GPU.
  * @param[in] minors Vector of second elements in vertex pairs.
  * @param[in] weights Optional vector of vertex pair weight values.
- * @param[in] edge_id_type_tuple Optional tuple of vectors of edge id and edge type values
+ * @param[in] edge_ids Optional vector of vertex pair edge id values.
+ * @param[in] edge_types Optional vector of vertex pair edge type values.
+ *
  * @param[in] vertex_partition_range_lasts Vector of each GPU's vertex partition range's last
  * (exclusive) vertex ID.
  *
- * @return Tuple of vectors storing shuffled major vertices, minor vertices and optional weights.
+ * @return Tuple of vectors storing shuffled major vertices, minor vertices and optional weights,
+ * edge ids and edge types
  */
 template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_id_t>
-std::tuple<
-  rmm::device_uvector<vertex_t>,
-  rmm::device_uvector<vertex_t>,
-  std::optional<rmm::device_uvector<weight_t>>,
-  std::optional<std::tuple<rmm::device_uvector<edge_t>, rmm::device_uvector<edge_type_id_t>>>>
-shuffle_int_vertex_pairs_to_local_gpu_by_edge_partitioning(
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_id_t>>>
+shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning(
   raft::handle_t const& handle,
   rmm::device_uvector<vertex_t>&& majors,
   rmm::device_uvector<vertex_t>&& minors,
   std::optional<rmm::device_uvector<weight_t>>&& weights,
-  std::optional<std::tuple<rmm::device_uvector<edge_t>, rmm::device_uvector<edge_type_id_t>>>&&
-    edge_id_type_tuple,
+  std::optional<rmm::device_uvector<edge_t>>&& edge_ids,
+  std::optional<rmm::device_uvector<edge_type_id_t>>&& edge_types,
   std::vector<vertex_t> const& vertex_partition_range_lasts);
 
 /**
@@ -205,8 +210,8 @@ rmm::device_uvector<size_t> groupby_and_count_edgelist_by_local_partition_id(
   rmm::device_uvector<vertex_t>& d_edgelist_majors,
   rmm::device_uvector<vertex_t>& d_edgelist_minors,
   std::optional<rmm::device_uvector<weight_t>>& d_edgelist_weights,
-  std::optional<std::tuple<rmm::device_uvector<edge_t>, rmm::device_uvector<edge_type_t>>>&
-    d_edgelist_id_type_pairs,
+  std::optional<rmm::device_uvector<edge_t>>& d_edgelist_edge_ids,
+  std::optional<rmm::device_uvector<edge_type_t>>& d_edgelist_edge_types,
   bool groupby_and_count_local_partition_by_minor = false);
 
 /**

@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <raft/core/device_span.hpp>
 #include <raft/core/handle.hpp>
 #include <raft/random/rng_state.hpp>
 
@@ -145,5 +146,17 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<edge_t>> filter_de
   rmm::device_uvector<vertex_t>&& d_vertices,
   rmm::device_uvector<edge_t>&& d_out_degs);
 
+/**
+ * @brief Check if device span is sorted
+ *
+ * @tparam data_t type of data in span
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param span The span of data to check
+ * @return true if sorted, false if not sorted
+ */
+template <typename data_t>
+bool is_sorted(raft::handle_t const& handle, raft::device_span<data_t> span);
+
 }  // namespace detail
 }  // namespace cugraph
@@ -663,7 +663,10 @@ extract_induced_subgraphs(
  *
  * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
  * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
- * @tparam edge_type_t Type of edge type identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weight.  Needs to be floating point type
+ * @tparam edge_id_t Type of edge id.  Needs to be an integral type
+ * @tparam edge_type_t Type of edge type.  Needs to be an integral type, currently only int32_t is
+ * supported
  * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if
  * true) as major indices in storing edges using a 2D sparse matrix. transposed.
  * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
@@ -679,42 +682,43 @@ extract_induced_subgraphs(
  * compute_gpu_id_from_ext_edge_endpoints_t to every edge should return the local GPU ID for this
  * function to work (edges should be pre-shuffled).
  * @param edgelist_dsts Vector of edge destination vertex IDs.
- * @param edgelist_weights Vector of edge weights.
- * @param edgelist_id_type_pairs Vector of edge ID and type pairs.
+ * @param edgelist_weights Vector of weight values for edges
+ * @param edgelist_edge_ids Vector of edge_id values for edges
+ * @param edgelist_edge_types Vector of edge_type values for edges
  * @param graph_properties Properties of the graph represented by the input (optional vertex list
  * and) edge list.
  * @param renumber Flag indicating whether to renumber vertices or not (must be true if @p multi_gpu
  * is true).
  * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
- * @return Tuple of the generated graph and optional edge_property_t objects storing edge weights
- * and edge IDs & types (valid if @p edgelist_weights.has_value() and @p
- * edgelist_id_type_pairss.has_value() are true, respectively) and a renumber map (if @p renumber is
- * true).
+ * @return Tuple of the generated graph and optional edge_property_t objects storing the provided
+ * edge properties and a renumber map (if @p renumber is true).
  */
 template <typename vertex_t,
           typename edge_t,
           typename weight_t,
+          typename edge_id_t,
           typename edge_type_t,
           bool store_transposed,
           bool multi_gpu>
 std::tuple<
   graph_t<vertex_t, edge_t, store_transposed, multi_gpu>,
   std::optional<
     edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, weight_t>>,
-  std::optional<edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
-                                thrust::tuple<edge_t, edge_type_t>>>,
+  std::optional<
+    edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, edge_id_t>>,
+  std::optional<
+    edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, edge_type_t>>,
   std::optional<rmm::device_uvector<vertex_t>>>
-create_graph_from_edgelist(
-  raft::handle_t const& handle,
-  std::optional<rmm::device_uvector<vertex_t>>&& vertices,
-  rmm::device_uvector<vertex_t>&& edgelist_srcs,
-  rmm::device_uvector<vertex_t>&& edgelist_dsts,
-  std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
-  std::optional<std::tuple<rmm::device_uvector<edge_t>, rmm::device_uvector<edge_type_t>>>&&
-    edgelist_id_type_pairs,
-  graph_properties_t graph_properties,
-  bool renumber,
-  bool do_expensive_check = false);
+create_graph_from_edgelist(raft::handle_t const& handle,
+                           std::optional<rmm::device_uvector<vertex_t>>&& vertices,
+                           rmm::device_uvector<vertex_t>&& edgelist_srcs,
+                           rmm::device_uvector<vertex_t>&& edgelist_dsts,
+                           std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
+                           std::optional<rmm::device_uvector<edge_id_t>>&& edgelist_edge_ids,
+                           std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types,
+                           graph_properties_t graph_properties,
+                           bool renumber,
+                           bool do_expensive_check = false);
 
 /**
  * @brief      Find all 2-hop neighbors in the graph