Skip to content

Commit 7256f32

Browse files
authored
Fix for bug in SCC on self-loops (#1475)
This provides fixes for strongly connected components on graphs with self-loops: #1471. closes #1471 Authors: - Andrei Schaffer (@aschaffer) Approvers: - Brad Rees (@BradReesWork) - Rick Ratzel (@rlratzel) URL: #1475
1 parent 254a999 commit 7256f32

File tree

3 files changed

+310
-86
lines changed

3 files changed

+310
-86
lines changed

cpp/src/components/connectivity.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ std::enable_if_t<std::is_signed<VT>::value> connected_components_impl(
7878
stream);
7979
} else {
8080
SCC_Data<ByteT, VT> sccd(nrows, graph.offsets, graph.indices);
81-
sccd.run_scc(labels);
81+
auto num_iters = sccd.run_scc(labels);
8282
}
8383
}
8484
} // namespace detail

cpp/src/components/scc_matrix.cuh

Lines changed: 63 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -71,12 +71,13 @@ struct SCC_Data {
7171
p_d_r_o_(p_d_r_o),
7272
p_d_c_i_(p_d_c_i),
7373
d_C(nrows * nrows, 0),
74-
d_Cprev(nrows * nrows, 0)
74+
d_Cprev(nrows * nrows, 0),
75+
p_d_C_(d_C.data().get())
7576
{
7677
init();
7778
}
7879

79-
const thrust::device_vector<ByteT>& get_C(void) const { return d_C; }
80+
ByteT const* get_Cptr(void) const { return p_d_C_; }
8081

8182
size_t nrows(void) const { return nrows_; }
8283

@@ -100,13 +101,12 @@ struct SCC_Data {
100101

101102
void get_labels(IndexT* d_labels) const
102103
{
103-
auto* p_d_C = d_C.data().get();
104-
size_t n = nrows_; // for lambda capture, since I cannot capture `this` (host), or `nrows_`
104+
size_t n = nrows_; // for lambda capture, since I cannot capture `this` (host), or `nrows_`
105105
thrust::transform(thrust::device,
106106
thrust::make_counting_iterator<IndexT>(0),
107107
thrust::make_counting_iterator<IndexT>(nrows_),
108108
d_labels,
109-
[n, p_d_C] __device__(IndexT k) {
109+
[n, p_d_C = p_d_C_] __device__(IndexT k) {
110110
auto begin = p_d_C + k * n;
111111
auto end = begin + n;
112112
ByteT one{1};
@@ -124,7 +124,6 @@ struct SCC_Data {
124124
size_t nrows = nrows_;
125125
size_t count = 0;
126126

127-
ByteT* p_d_C = d_C.data().get();
128127
ByteT* p_d_Cprev = get_Cprev().data().get();
129128

130129
size_t n2 = nrows * nrows;
@@ -136,57 +135,60 @@ struct SCC_Data {
136135
do {
137136
flag.set(0);
138137

139-
thrust::for_each(thrust::device,
140-
thrust::make_counting_iterator<size_t>(0),
141-
thrust::make_counting_iterator<size_t>(n2),
142-
[nrows, p_d_C, p_d_Cprev, p_d_flag, p_d_ro, p_d_ci] __device__(size_t indx) {
143-
ByteT one{1};
144-
145-
auto i = indx / nrows;
146-
auto j = indx % nrows;
147-
148-
if ((i == j) || (p_d_Cprev[indx] == one))
149-
p_d_C[indx] = one;
150-
else {
151-
// this is where a hash-map could help:
152-
// only need hashmap[(i,j)]={0,1} (`1` for "hit");
153-
// and only for new entries!
154-
// already existent entries are covered by
155-
// the `if`-branch above!
156-
// Hence, hashmap[] can use limited space:
157-
// M = max_l{number(new `1` entries)}, where
158-
// l = #iterations in the do-loop!
159-
// M ~ new `1` entries between A^k and A^{k+1},
160-
// k=1,2,...
161-
// Might M actually be M ~ nnz(A) = |E| ?!
162-
// Probably, because the primitive hash
163-
//(via find_if) uses a search space of nnz(A)
164-
//
165-
// But, what if more than 1 entry pops-up in a row?
166-
// Not an issue! Because the hash key is (i,j), and no
167-
// more than one entry can exist in position (i,j)!
168-
//
169-
// And remember, we only need to store the new (i,j) keys
170-
// that an iteration produces wrt to the previous iteration!
171-
//
172-
auto begin = p_d_ci + p_d_ro[i];
173-
auto end = p_d_ci + p_d_ro[i + 1];
174-
auto pos = thrust::find_if(
175-
thrust::seq, begin, end, [one, j, nrows, p_d_Cprev, p_d_ci](IndexT k) {
176-
return (p_d_Cprev[k * nrows + j] == one);
177-
});
178-
179-
if (pos != end) p_d_C[indx] = one;
180-
}
181-
182-
if (p_d_C[indx] != p_d_Cprev[indx])
183-
*p_d_flag = 1; // race-condition: harmless, worst case many threads
184-
// write the same value
185-
});
138+
thrust::for_each(
139+
thrust::device,
140+
thrust::make_counting_iterator<size_t>(0),
141+
thrust::make_counting_iterator<size_t>(n2),
142+
[nrows, p_d_C = p_d_C_, p_d_Cprev, p_d_flag, p_d_ro, p_d_ci] __device__(size_t indx) {
143+
ByteT one{1};
144+
145+
auto i = indx / nrows;
146+
auto j = indx % nrows;
147+
148+
if ((i == j) || (p_d_Cprev[indx] == one)) {
149+
p_d_C[indx] = one;
150+
} else {
151+
// this ammounts to A (^,v) B
152+
// (where A = adjacency matrix defined by (p_ro, p_ci),
153+
// B := p_d_Cprev; (^,v) := (*,+) semiring);
154+
// Here's why:
155+
// (A (^,v) B)[i][j] := A[i][.] (^,v) B[j][.]
156+
// (where X[i][.] := i-th row of X;
157+
// X[.][j] := j-th column of X);
158+
// which is:
159+
// 1, iff A[i][.] and B[j][.] have a 1 in the same location,
160+
// 0, otherwise;
161+
//
162+
// i.e., corresponfing entry in p_d_C is 1
163+
// if B[k][j] == 1 for any column k in A's i-th row;
164+
// hence, for each column k of row A[i][.],
165+
// which is the set:
166+
// k \in {p_ci + p_ro[i], ..., p_ci + p_ro[i+1] - 1},
167+
// check if (B[k][j] == 1),
168+
// i.e., p_d_Cprev[k*nrows + j]) == 1:
169+
//
170+
auto begin = p_d_ci + p_d_ro[i];
171+
auto end = p_d_ci + p_d_ro[i + 1];
172+
auto pos = thrust::find_if(
173+
thrust::seq, begin, end, [one, j, nrows, p_d_Cprev, p_d_ci](IndexT k) {
174+
return (p_d_Cprev[k * nrows + j] == one);
175+
});
176+
177+
if (pos != end) p_d_C[indx] = one;
178+
}
179+
180+
if (p_d_C[indx] != p_d_Cprev[indx])
181+
*p_d_flag = 1; // race-condition: harmless,
182+
// worst case many threads
183+
// write the _same_ value
184+
});
186185
++count;
187186
cudaDeviceSynchronize();
188187

189-
std::swap(p_d_C, p_d_Cprev);
188+
std::swap(p_d_C_, p_d_Cprev); // Note 1: this swap makes `p_d_Cprev` the
189+
// most recently updated matrix pointer
190+
// at the end of this loop
191+
// (see `Note 2` why this matters);
190192
} while (flag.is_set());
191193

192194
// C & Ct:
@@ -196,11 +198,13 @@ struct SCC_Data {
196198
thrust::for_each(thrust::device,
197199
thrust::make_counting_iterator<size_t>(0),
198200
thrust::make_counting_iterator<size_t>(n2),
199-
[nrows, p_d_C, p_d_Cprev] __device__(size_t indx) {
201+
[nrows, p_d_C = p_d_C_, p_d_Cprev] __device__(size_t indx) {
200202
auto i = indx / nrows;
201203
auto j = indx % nrows;
202204
auto tindx = j * nrows + i;
203205

206+
// Note 2: per Note 1, p_d_Cprev is latest:
207+
//
204208
p_d_C[indx] = (p_d_Cprev[indx]) & (p_d_Cprev[tindx]);
205209
});
206210

@@ -215,6 +219,9 @@ struct SCC_Data {
215219
const IndexT* p_d_c_i_; // column indices
216220
thrust::device_vector<ByteT> d_C;
217221
thrust::device_vector<ByteT> d_Cprev;
222+
ByteT* p_d_C_{nullptr}; // holds the most recent update,
223+
// which can have storage in any of d_C or d_Cprev,
224+
// because the pointers get swapped!
218225

219226
thrust::device_vector<ByteT>& get_Cprev(void) { return d_Cprev; }
220227
};

0 commit comments

Comments
 (0)