Skip to content

Commit 2d730b2

Browse files
authored
Merge 21fe2aa into aba3445
2 parents aba3445 + 21fe2aa commit 2d730b2

File tree

9 files changed

+607
-192
lines changed

9 files changed

+607
-192
lines changed

cpp/include/cugraph/experimental/detail/graph_utils.cuh

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,10 @@ template <typename vertex_t, typename edge_t>
4343
rmm::device_uvector<edge_t> compute_major_degrees(
4444
raft::handle_t const& handle,
4545
std::vector<edge_t const*> const& adj_matrix_partition_offsets,
46-
partition_t<vertex_t> const& partition)
46+
std::optional<std::vector<vertex_t const*>> const& adj_matrix_partition_dcs_nzd_vertices,
47+
std::optional<std::vector<vertex_t>> const& adj_matrix_partition_dcs_nzd_vertex_counts,
48+
partition_t<vertex_t> const& partition,
49+
std::optional<std::vector<vertex_t>> const& adj_matrix_partition_segment_offsets)
4750
{
4851
auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name());
4952
auto const row_comm_rank = row_comm.get_rank();
@@ -52,6 +55,8 @@ rmm::device_uvector<edge_t> compute_major_degrees(
5255
auto const col_comm_rank = col_comm.get_rank();
5356
auto const col_comm_size = col_comm.get_size();
5457

58+
auto use_dcs = adj_matrix_partition_dcs_nzd_vertices.has_value();
59+
5560
rmm::device_uvector<edge_t> local_degrees(0, handle.get_stream());
5661
rmm::device_uvector<edge_t> degrees(0, handle.get_stream());
5762

@@ -69,11 +74,37 @@ rmm::device_uvector<edge_t> compute_major_degrees(
6974
vertex_t major_last{};
7075
std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx);
7176
auto p_offsets = adj_matrix_partition_offsets[i];
77+
auto major_hypersparse_first =
78+
use_dcs ? major_first + (*adj_matrix_partition_segment_offsets)
79+
[(detail::num_sparse_segments_per_vertex_partition + 2) * i +
80+
detail::num_sparse_segments_per_vertex_partition]
81+
: major_last;
7282
thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
7383
thrust::make_counting_iterator(vertex_t{0}),
74-
thrust::make_counting_iterator(major_last - major_first),
75-
local_degrees.data(),
84+
thrust::make_counting_iterator(major_hypersparse_first - major_first),
85+
local_degrees.begin(),
7686
[p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; });
87+
if (use_dcs) {
88+
auto p_dcs_nzd_vertices = (*adj_matrix_partition_dcs_nzd_vertices)[i];
89+
auto dcs_nzd_vertex_count = (*adj_matrix_partition_dcs_nzd_vertex_counts)[i];
90+
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
91+
local_degrees.begin() + (major_hypersparse_first - major_first),
92+
local_degrees.begin() + (major_last - major_first),
93+
edge_t{0});
94+
thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
95+
thrust::make_counting_iterator(vertex_t{0}),
96+
thrust::make_counting_iterator(dcs_nzd_vertex_count),
97+
[p_offsets,
98+
p_dcs_nzd_vertices,
99+
major_first,
100+
major_hypersparse_first,
101+
local_degrees = local_degrees.data()] __device__(auto i) {
102+
auto d = p_offsets[(major_hypersparse_first - major_first) + i + 1] -
103+
p_offsets[(major_hypersparse_first - major_first) + i];
104+
auto v = p_dcs_nzd_vertices[i];
105+
local_degrees[v - major_first] = d;
106+
});
107+
}
77108
col_comm.reduce(local_degrees.data(),
78109
i == col_comm_rank ? degrees.data() : static_cast<edge_t*>(nullptr),
79110
static_cast<size_t>(major_last - major_first),
@@ -85,23 +116,6 @@ rmm::device_uvector<edge_t> compute_major_degrees(
85116
return degrees;
86117
}
87118

88-
// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
89-
// false) or columns (of the graph adjacency matrix, if store_transposed = true)
90-
template <typename vertex_t, typename edge_t>
91-
rmm::device_uvector<edge_t> compute_major_degrees(
92-
raft::handle_t const& handle,
93-
std::vector<rmm::device_uvector<edge_t>> const& adj_matrix_partition_offsets,
94-
partition_t<vertex_t> const& partition)
95-
{
96-
// we can avoid creating this temporary with "if constexpr" supported from C++17
97-
std::vector<edge_t const*> tmp_offsets(adj_matrix_partition_offsets.size(), nullptr);
98-
std::transform(adj_matrix_partition_offsets.begin(),
99-
adj_matrix_partition_offsets.end(),
100-
tmp_offsets.begin(),
101-
[](auto const& offsets) { return offsets.data(); });
102-
return compute_major_degrees(handle, tmp_offsets, partition);
103-
}
104-
105119
// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
106120
// false) or columns (of the graph adjacency matrix, if store_transposed = true)
107121
template <typename vertex_t, typename edge_t>
@@ -117,13 +131,6 @@ rmm::device_uvector<edge_t> compute_major_degrees(raft::handle_t const& handle,
117131
return degrees;
118132
}
119133

120-
template <typename vertex_t, typename edge_t>
121-
struct degree_from_offsets_t {
122-
edge_t const* offsets{nullptr};
123-
124-
__device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; }
125-
};
126-
127134
template <typename vertex_t>
128135
struct compute_gpu_id_from_vertex_t {
129136
int comm_size{0};

cpp/include/cugraph/matrix_partition_device_view.cuh

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515
*/
1616
#pragma once
1717

18+
#include <cugraph/experimental/graph.hpp>
1819
#include <cugraph/experimental/graph_view.hpp>
1920
#include <cugraph/utilities/error.hpp>
2021

22+
#include <thrust/binary_search.h>
23+
#include <thrust/distance.h>
2124
#include <thrust/optional.h>
2225
#include <thrust/tuple.h>
2326

27+
#include <cassert>
2428
#include <optional>
2529
#include <type_traits>
2630

@@ -49,25 +53,28 @@ class matrix_partition_device_view_base_t {
4953
__host__ __device__ vertex_t const* get_indices() const { return indices_; }
5054
__host__ __device__ thrust::optional<weight_t const*> get_weights() const { return weights_; }
5155

56+
// major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
5257
__device__ thrust::tuple<vertex_t const*, thrust::optional<weight_t const*>, edge_t>
53-
get_local_edges(vertex_t major_offset) const noexcept
58+
get_local_edges(vertex_t major_idx) const noexcept
5459
{
55-
auto edge_offset = *(offsets_ + major_offset);
56-
auto local_degree = *(offsets_ + (major_offset + 1)) - edge_offset;
60+
auto edge_offset = *(offsets_ + major_idx);
61+
auto local_degree = *(offsets_ + (major_idx + 1)) - edge_offset;
5762
auto indices = indices_ + edge_offset;
5863
auto weights =
5964
weights_ ? thrust::optional<weight_t const*>{*weights_ + edge_offset} : thrust::nullopt;
6065
return thrust::make_tuple(indices, weights, local_degree);
6166
}
6267

63-
__device__ edge_t get_local_degree(vertex_t major_offset) const noexcept
68+
// major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
69+
__device__ edge_t get_local_degree(vertex_t major_idx) const noexcept
6470
{
65-
return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset);
71+
return *(offsets_ + (major_idx + 1)) - *(offsets_ + major_idx);
6672
}
6773

68-
__device__ edge_t get_local_offset(vertex_t major_offset) const noexcept
74+
// major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
75+
__device__ edge_t get_local_offset(vertex_t major_idx) const noexcept
6976
{
70-
return *(offsets_ + major_offset);
77+
return *(offsets_ + major_idx);
7178
}
7279

7380
private:
@@ -148,6 +155,34 @@ class matrix_partition_device_view_t<vertex_t,
148155
return major_first_ + major_offset;
149156
}
150157

158+
// major_hypersparse_idx: index within the hypersparse segment
159+
__host__ __device__ thrust::optional<vertex_t> get_major_hypersparse_idx_from_major_nocheck(
160+
vertex_t major) const noexcept
161+
{
162+
if (dcs_nzd_vertices_) {
163+
// we can avoid binary search (and potentially improve performance) if we add an auxiliary
164+
// array or cuco::static_map (at the expense of additional memory)
165+
auto it = thrust::lower_bound(
166+
thrust::seq, *dcs_nzd_vertices_, *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_, major);
167+
return it != *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_
168+
? (*it == major ? thrust::optional<vertex_t>{static_cast<vertex_t>(
169+
thrust::distance(*dcs_nzd_vertices_, it))}
170+
: thrust::nullopt)
171+
: thrust::nullopt;
172+
} else {
173+
return thrust::nullopt;
174+
}
175+
}
176+
177+
// major_hypersparse_idx: index within the hypersparse segment
178+
__host__ __device__ thrust::optional<vertex_t> get_major_from_major_hypersparse_idx_nocheck(
179+
vertex_t major_hypersparse_idx) const noexcept
180+
{
181+
return dcs_nzd_vertices_
182+
? thrust::optional<vertex_t>{(*dcs_nzd_vertices_)[major_hypersparse_idx]}
183+
: thrust::nullopt;
184+
}
185+
151186
__host__ __device__ vertex_t
152187
get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
153188
{
@@ -159,6 +194,15 @@ class matrix_partition_device_view_t<vertex_t,
159194
return major_value_start_offset_;
160195
}
161196

197+
__host__ __device__ thrust::optional<vertex_t const*> get_dcs_nzd_vertices() const
198+
{
199+
return dcs_nzd_vertices_;
200+
}
201+
__host__ __device__ thrust::optional<vertex_t> get_dcs_nzd_vertex_count() const
202+
{
203+
return dcs_nzd_vertex_count_;
204+
}
205+
162206
private:
163207
// should be trivially copyable to device
164208

@@ -220,12 +264,39 @@ class matrix_partition_device_view_t<vertex_t,
220264
return major_offset;
221265
}
222266

267+
// major_hypersparse_idx: index within the hypersparse segment
268+
__host__ __device__ thrust::optional<vertex_t> get_major_hypersparse_idx_from_major_nocheck(
269+
vertex_t major) const noexcept
270+
{
271+
assert(false);
272+
return thrust::nullopt;
273+
}
274+
275+
// major_hypersparse_idx: index within the hypersparse segment
276+
__host__ __device__ thrust::optional<vertex_t> get_major_from_major_hypersparse_idx_nocheck(
277+
vertex_t major_hypersparse_idx) const noexcept
278+
{
279+
assert(false);
280+
return thrust::nullopt;
281+
}
282+
223283
__host__ __device__ vertex_t
224284
get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
225285
{
226286
return minor_offset;
227287
}
228288

289+
__host__ __device__ thrust::optional<vertex_t const*> get_dcs_nzd_vertices() const
290+
{
291+
assert(false);
292+
return thrust::nullopt;
293+
}
294+
__host__ __device__ thrust::optional<vertex_t> get_dcs_nzd_vertex_count() const
295+
{
296+
assert(false);
297+
return thrust::nullopt;
298+
}
299+
229300
private:
230301
vertex_t number_of_vertices_;
231302
};

cpp/include/cugraph/matrix_partition_view.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ class matrix_partition_view_t<vertex_t, edge_t, weight_t, multi_gpu, std::enable
123123
{
124124
}
125125

126+
std::optional<vertex_t const*> get_dcs_nzd_vertices() const { return std::nullopt; }
127+
std::optional<vertex_t> get_dcs_nzd_vertex_count() const { return std::nullopt; }
128+
126129
vertex_t get_major_first() const { return vertex_t{0}; }
127130
vertex_t get_major_last() const { return number_of_vertices_; }
128131
vertex_t get_minor_first() const { return vertex_t{0}; }

0 commit comments

Comments
 (0)