Skip to content

Commit 563c97c

Browse files
authored
K-hop neighbors (#2782)
K-hop neighbors implementation (mainly to support 2-hop neighbors). Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) URL: #2782
1 parent 64ddc95 commit 563c97c

13 files changed

+1074
-64
lines changed

cpp/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ set(CUGRAPH_SOURCES
254254
src/structure/symmetrize_edgelist_mg.cu
255255
src/community/triangle_count_sg.cu
256256
src/community/triangle_count_mg.cu
257+
src/traversal/k_hop_nbrs_sg.cu
258+
src/traversal/k_hop_nbrs_mg.cu
257259
)
258260

259261
if(USE_CUGRAPH_OPS)

cpp/include/cugraph/algorithms.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1798,6 +1798,36 @@ rmm::device_uvector<weight_t> overlap_coefficients(
17981798
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
17991799
bool use_weights);
18001800

1801+
/*
1802+
* @brief Enumerate K-hop neighbors
1803+
*
1804+
* Note that the number of K-hop neighbors (and memory footprint) can grow very fast if there are
1805+
* high-degree vertices. Limit the number of start vertices and @p k to avoid rapid increase in
1806+
* memory footprint.
1807+
*
1808+
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
1809+
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
1810+
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
1811+
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
1812+
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
1813+
* handles to various CUDA libraries) to run graph algorithms.
1814+
* @param graph_view Graph view object.
1815+
* @param start_vertices Find K-hop neighbors from each vertex in @p start_vertices.
1816+
* @param k Number of hops to make to enumerate neighbors.
1817+
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
1818+
* @return Tuple of two arrays: offsets and K-hop neighbors. The size of the offset array is @p
1819+
* start_vertices.size() + 1. The i'th and (i+1)'th elements of the offset array demarcates the
1820+
* beginning (inclusive) and end (exclusive) of the K-hop neighbors of the i'th element of @p
1821+
* start_vertices, respectively.
1822+
*/
1823+
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
1824+
std::tuple<rmm::device_uvector<size_t>, rmm::device_uvector<vertex_t>> k_hop_nbrs(
1825+
raft::handle_t const& handle,
1826+
graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu> const& graph_view,
1827+
raft::device_span<vertex_t const> start_vertices,
1828+
size_t k,
1829+
bool do_expensive_check = false);
1830+
18011831
} // namespace cugraph
18021832

18031833
/**

cpp/include/cugraph/edge_partition_device_view.cuh

Lines changed: 63 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ __device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck_i
5555
: thrust::nullopt;
5656
}
5757

58-
template <typename vertex_t, typename edge_t, bool multi_gpu, bool use_dcs>
58+
template <typename vertex_t, typename edge_t, typename return_type_t, bool multi_gpu, bool use_dcs>
5959
struct local_degree_op_t {
6060
raft::device_span<edge_t const> offsets{};
6161
std::conditional_t<multi_gpu, vertex_t, std::byte /* dummy */> major_range_first{};
@@ -64,30 +64,30 @@ struct local_degree_op_t {
6464
dcs_nzd_vertices{};
6565
std::conditional_t<use_dcs, vertex_t, std::byte /* dummy */> major_hypersparse_first{};
6666

67-
__device__ edge_t operator()(vertex_t major) const
67+
__device__ return_type_t operator()(vertex_t major) const
6868
{
6969
if constexpr (multi_gpu) {
7070
vertex_t idx{};
7171
if constexpr (use_dcs) {
7272
if (major < major_hypersparse_first) {
7373
idx = major - major_range_first;
74-
return offsets[idx + 1] - offsets[idx];
74+
return static_cast<return_type_t>(offsets[idx + 1] - offsets[idx]);
7575
} else {
7676
auto major_hypersparse_idx =
7777
major_hypersparse_idx_from_major_nocheck_impl(dcs_nzd_vertices, major);
7878
if (major_hypersparse_idx) {
7979
idx = (major_hypersparse_first - major_range_first) + *major_hypersparse_idx;
80-
return offsets[idx + 1] - offsets[idx];
80+
return static_cast<return_type_t>(offsets[idx + 1] - offsets[idx]);
8181
} else {
82-
return edge_t{0};
82+
return return_type_t{0};
8383
}
8484
}
8585
} else {
8686
idx = major - major_range_first;
87-
return offsets[idx + 1] - offsets[idx];
87+
return static_cast<return_type_t>(offsets[idx + 1] - offsets[idx]);
8888
}
8989
} else {
90-
return offsets[major + 1] - offsets[major];
90+
return static_cast<return_type_t>(offsets[major + 1] - offsets[major]);
9191
}
9292
}
9393
};
@@ -176,31 +176,39 @@ class edge_partition_device_view_t<vertex_t,
176176
{
177177
}
178178

179-
edge_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
179+
size_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
180180
rmm::cuda_stream_view stream) const
181181
{
182182
return dcs_nzd_vertices_ ? thrust::transform_reduce(
183183
rmm::exec_policy(stream),
184184
majors.begin(),
185185
majors.end(),
186-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, true>{
187-
this->offsets_,
188-
major_range_first_,
189-
*dcs_nzd_vertices_,
190-
*major_hypersparse_first_},
191-
edge_t{0},
192-
thrust::plus<edge_t>())
186+
detail::local_degree_op_t<
187+
vertex_t,
188+
edge_t,
189+
size_t /* no limit on majors.size(), so edge_t can overflow */,
190+
multi_gpu,
191+
true>{this->offsets_,
192+
major_range_first_,
193+
*dcs_nzd_vertices_,
194+
*major_hypersparse_first_},
195+
size_t{0},
196+
thrust::plus<size_t>())
193197
: thrust::transform_reduce(
194198
rmm::exec_policy(stream),
195199
majors.begin(),
196200
majors.end(),
197-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{
198-
this->offsets_,
199-
major_range_first_,
200-
std::byte{0} /* dummy */,
201-
std::byte{0} /* dummy */},
202-
edge_t{0},
203-
thrust::plus<edge_t>());
201+
detail::local_degree_op_t<
202+
vertex_t,
203+
edge_t,
204+
size_t /* no limit on majors.size(), so edge_t can overflow */,
205+
multi_gpu,
206+
false>{this->offsets_,
207+
major_range_first_,
208+
std::byte{0} /* dummy */,
209+
std::byte{0} /* dummy */},
210+
size_t{0},
211+
thrust::plus<size_t>());
204212
}
205213

206214
rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
@@ -212,7 +220,7 @@ class edge_partition_device_view_t<vertex_t,
212220
thrust::make_counting_iterator(this->major_range_first()),
213221
thrust::make_counting_iterator(this->major_range_last()),
214222
local_degrees.begin(),
215-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, true>{
223+
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, true>{
216224
this->offsets_,
217225
major_range_first_,
218226
*dcs_nzd_vertices_,
@@ -223,7 +231,7 @@ class edge_partition_device_view_t<vertex_t,
223231
thrust::make_counting_iterator(this->major_range_first()),
224232
thrust::make_counting_iterator(this->major_range_last()),
225233
local_degrees.begin(),
226-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{
234+
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
227235
this->offsets_, major_range_first_, std::byte{0} /* dummy */, std::byte{0} /* dummy */});
228236
}
229237
return local_degrees;
@@ -239,7 +247,7 @@ class edge_partition_device_view_t<vertex_t,
239247
majors.begin(),
240248
majors.end(),
241249
local_degrees.begin(),
242-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, true>{
250+
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, true>{
243251
this->offsets_,
244252
major_range_first_,
245253
dcs_nzd_vertices_.value(),
@@ -250,7 +258,7 @@ class edge_partition_device_view_t<vertex_t,
250258
majors.begin(),
251259
majors.end(),
252260
local_degrees.begin(),
253-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{
261+
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
254262
this->offsets_, major_range_first_, std::byte{0} /* dummy */, std::byte{0} /* dummy */});
255263
}
256264
return local_degrees;
@@ -366,49 +374,53 @@ class edge_partition_device_view_t<vertex_t,
366374
{
367375
}
368376

369-
edge_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
377+
size_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
370378
rmm::cuda_stream_view stream) const
371379
{
372380
return thrust::transform_reduce(
373381
rmm::exec_policy(stream),
374382
majors.begin(),
375383
majors.end(),
376-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{this->offsets_,
377-
std::byte{0} /* dummy */,
378-
std::byte{0} /* dummy */,
379-
std::byte{0} /* dummy */},
380-
edge_t{0},
381-
thrust::plus<edge_t>());
384+
detail::local_degree_op_t<vertex_t,
385+
edge_t,
386+
size_t /* no limit on majors.size(), so edge_t can overflow */,
387+
multi_gpu,
388+
false>{this->offsets_,
389+
std::byte{0} /* dummy */,
390+
std::byte{0} /* dummy */,
391+
std::byte{0} /* dummy */},
392+
size_t{0},
393+
thrust::plus<size_t>());
382394
}
383395

384396
rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
385397
{
386398
rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
387-
thrust::transform(
388-
rmm::exec_policy(stream),
389-
thrust::make_counting_iterator(this->major_range_first()),
390-
thrust::make_counting_iterator(this->major_range_last()),
391-
local_degrees.begin(),
392-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{this->offsets_,
393-
std::byte{0} /* dummy */,
394-
std::byte{0} /* dummy */,
395-
std::byte{0} /* dummy */});
399+
thrust::transform(rmm::exec_policy(stream),
400+
thrust::make_counting_iterator(this->major_range_first()),
401+
thrust::make_counting_iterator(this->major_range_last()),
402+
local_degrees.begin(),
403+
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
404+
this->offsets_,
405+
std::byte{0} /* dummy */,
406+
std::byte{0} /* dummy */,
407+
std::byte{0} /* dummy */});
396408
return local_degrees;
397409
}
398410

399411
rmm::device_uvector<edge_t> compute_local_degrees(raft::device_span<vertex_t const> majors,
400412
rmm::cuda_stream_view stream) const
401413
{
402414
rmm::device_uvector<edge_t> local_degrees(majors.size(), stream);
403-
thrust::transform(
404-
rmm::exec_policy(stream),
405-
majors.begin(),
406-
majors.end(),
407-
local_degrees.begin(),
408-
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{this->offsets_,
409-
std::byte{0} /* dummy */,
410-
std::byte{0} /* dummy */,
411-
std::byte{0} /* dummy */});
415+
thrust::transform(rmm::exec_policy(stream),
416+
majors.begin(),
417+
majors.end(),
418+
local_degrees.begin(),
419+
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
420+
this->offsets_,
421+
std::byte{0} /* dummy */,
422+
std::byte{0} /* dummy */,
423+
std::byte{0} /* dummy */});
412424
return local_degrees;
413425
}
414426

cpp/src/components/weakly_connected_components_impl.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,8 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
563563
}
564564

565565
auto max_pushes = GraphViewType::is_multi_gpu
566-
? compute_num_out_nbrs_from_frontier(
567-
handle, level_graph_view, vertex_frontier.bucket(bucket_idx_cur))
566+
? static_cast<edge_t>(compute_num_out_nbrs_from_frontier(
567+
handle, level_graph_view, vertex_frontier.bucket(bucket_idx_cur)))
568568
: edge_count;
569569

570570
// FIXME: if we use cuco::static_map (no duplicates, ideally we need static_set), edge_buffer

cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,9 @@ auto sort_and_reduce_buffer_elements(
210210
} // namespace detail
211211

212212
template <typename GraphViewType, typename VertexFrontierBucketType>
213-
typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier(
214-
raft::handle_t const& handle,
215-
GraphViewType const& graph_view,
216-
VertexFrontierBucketType const& frontier)
213+
size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle,
214+
GraphViewType const& graph_view,
215+
VertexFrontierBucketType const& frontier)
217216
{
218217
static_assert(!GraphViewType::is_storage_transposed,
219218
"GraphViewType should support the push model.");
@@ -223,7 +222,7 @@ typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier(
223222
using weight_t = typename GraphViewType::weight_type;
224223
using key_t = typename VertexFrontierBucketType::key_type;
225224

226-
edge_t ret{0};
225+
size_t ret{0};
227226

228227
vertex_t const* local_frontier_vertex_first{nullptr};
229228
if constexpr (std::is_same_v<key_t, vertex_t>) {
@@ -244,7 +243,6 @@ typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier(
244243
edge_partition_device_view_t<vertex_t, edge_t, weight_t, GraphViewType::is_multi_gpu>(
245244
graph_view.local_edge_partition_view(i));
246245

247-
// FIXME: edge_partition.compute_number_of_edges()???
248246
if constexpr (GraphViewType::is_multi_gpu) {
249247
auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name());
250248
auto const col_comm_rank = col_comm.get_rank();

cpp/src/prims/vertex_frontier.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,9 @@ class key_bucket_t {
210210
tags_ = std::move(merged_tags);
211211
} else {
212212
auto cur_size = vertices_.size();
213-
vertices_.resize(cur_size + thrust::distance(key_first, key_last));
214-
tags_.resize(vertices_.size());
213+
vertices_.resize(cur_size + thrust::distance(key_first, key_last),
214+
handle_ptr_->get_stream());
215+
tags_.resize(vertices_.size(), handle_ptr_->get_stream());
215216
thrust::copy(
216217
handle_ptr_->get_thrust_policy(),
217218
key_first,

0 commit comments

Comments
 (0)