Skip to content

Commit 6d0239d

Browse files
authored
Add Eigenvector Centrality to pylibcugraph, cugraph APIs (#2255)
This PR: 1. Adds Eigenvector Centrality to the pylibcugraph and cugraph software stacks, which started from #2180 and is followed up by future PRs in order to close #2146 2. Minor improvements to pylibcugraph Katz Centrality 3. Added functionality to `test_doctests.py` so that certain docstrings can be skipped on different architecture configs (such as ktruss in CUDA 11.4) 4. Added undirected/directed versions of graph example used in C tests in `datasets` 5. Removed cugraph copy of warning wrapper from pylibcugraph and have it call the pylibcugraph version 6. Testing for both Python eigenvector centrality wrappers This PR is identical to #2243, just that the name of the branch is different Authors: - https://github.com/betochimas - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: #2255
1 parent b0c6a9e commit 6d0239d

File tree

21 files changed

+878
-107
lines changed

21 files changed

+878
-107
lines changed

datasets/toy_graph.csv

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,12 @@
55
2 1 3.1
66
2 3 4.1
77
3 5 7.2
8-
4 5 3.2
8+
4 5 3.2
9+
1 0 0.1
10+
3 1 2.1
11+
4 1 1.1
12+
0 2 5.1
13+
1 2 3.1
14+
3 2 4.1
15+
5 3 7.2
16+
5 4 3.2

datasets/toy_graph_undirected.csv

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
0 1 0.1
2+
1 3 2.1
3+
1 4 1.1
4+
2 0 5.1
5+
2 1 3.1
6+
2 3 4.1
7+
3 5 7.2
8+
4 5 3.2

docs/cugraph/source/api_docs/centrality.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,17 @@ Degree Centrality
3333
:toctree: api/
3434

3535
cugraph.degree_centrality
36+
37+
Eigenvector Centrality
38+
----------------------
39+
.. autosummary::
40+
:toctree: api/
41+
42+
cugraph.centrality.eigenvector_centrality
43+
44+
Eigenvector Centrality (MG)
45+
---------------------------
46+
.. autosummary::
47+
:toctree: api/
48+
49+
cugraph.dask.centrality.eigenvector_centrality.eigenvector_centrality

python/cugraph/cugraph/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
edge_betweenness_centrality,
6262
katz_centrality,
6363
degree_centrality,
64+
eigenvector_centrality,
6465
)
6566

6667
from cugraph.cores import core_number, k_core

python/cugraph/cugraph/centrality/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@
1717
edge_betweenness_centrality,
1818
)
1919
from cugraph.centrality.degree_centrality import degree_centrality
20+
from cugraph.centrality.eigenvector_centrality import eigenvector_centrality
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Copyright (c) 2022, NVIDIA CORPORATION.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
from pylibcugraph import (ResourceHandle,
15+
GraphProperties,
16+
SGGraph,
17+
eigenvector_centrality as pylib_eigen
18+
)
19+
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
20+
df_score_to_dictionary,
21+
)
22+
import cudf
23+
import cupy
24+
25+
26+
def eigenvector_centrality(
27+
G, max_iter=100, tol=1.0e-6, normalized=True
28+
):
29+
"""
30+
Compute the eigenvector centrality for a graph G.
31+
32+
Eigenvector centrality computes the centrality for a node based on the
33+
centrality of its neighbors. The eigenvector centrality for node i is the
34+
i-th element of the vector x defined by the eigenvector equation.
35+
36+
Parameters
37+
----------
38+
G : cuGraph.Graph or networkx.Graph
39+
cuGraph graph descriptor with connectivity information. The graph can
40+
contain either directed or undirected edges.
41+
42+
max_iter : int, optional (default=100)
43+
The maximum number of iterations before an answer is returned. This can
44+
be used to limit the execution time and do an early exit before the
45+
solver reaches the convergence tolerance.
46+
47+
tol : float, optional (default=1e-6)
48+
Set the tolerance the approximation, this parameter should be a small
49+
magnitude value.
50+
The lower the tolerance the better the approximation. If this value is
51+
0.0f, cuGraph will use the default value which is 1.0e-6.
52+
Setting too small a tolerance can lead to non-convergence due to
53+
numerical roundoff. Usually values between 1e-2 and 1e-6 are
54+
acceptable.
55+
56+
normalized : bool, optional, default=True
57+
If True normalize the resulting eigenvector centrality values
58+
59+
Returns
60+
-------
61+
df : cudf.DataFrame or Dictionary if using NetworkX
62+
GPU data frame containing two cudf.Series of size V: the vertex
63+
identifiers and the corresponding eigenvector centrality values.
64+
df['vertex'] : cudf.Series
65+
Contains the vertex identifiers
66+
df['eigenvector_centrality'] : cudf.Series
67+
Contains the eigenvector centrality of vertices
68+
69+
Examples
70+
--------
71+
>>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
72+
... dtype=['int32', 'int32', 'float32'], header=None)
73+
>>> G = cugraph.Graph()
74+
>>> G.from_cudf_edgelist(gdf, source='0', destination='1')
75+
>>> ec = cugraph.eigenvector_centrality(G)
76+
77+
"""
78+
if (not isinstance(max_iter, int)) or max_iter <= 0:
79+
raise ValueError(f"'max_iter' must be a positive integer"
80+
f", got: {max_iter}")
81+
if (not isinstance(tol, float)) or (tol <= 0.0):
82+
raise ValueError(f"'tol' must be a positive float, got: {tol}")
83+
84+
G, isNx = ensure_cugraph_obj_for_nx(G)
85+
86+
srcs = G.edgelist.edgelist_df['src']
87+
dsts = G.edgelist.edgelist_df['dst']
88+
if 'weights' in G.edgelist.edgelist_df.columns:
89+
weights = G.edgelist.edgelist_df['weights']
90+
else:
91+
# FIXME: If weights column is not imported, a weights column of 1s
92+
# with type hardcoded to float32 is passed into wrapper
93+
weights = cudf.Series(cupy.ones(srcs.size, dtype="float32"))
94+
95+
resource_handle = ResourceHandle()
96+
graph_props = GraphProperties(is_multigraph=G.is_multigraph())
97+
store_transposed = False
98+
renumber = False
99+
do_expensive_check = False
100+
101+
sg = SGGraph(resource_handle, graph_props, srcs, dsts, weights,
102+
store_transposed, renumber, do_expensive_check)
103+
104+
vertices, values = pylib_eigen(resource_handle, sg,
105+
tol, max_iter,
106+
do_expensive_check)
107+
108+
vertices = cudf.Series(vertices)
109+
values = cudf.Series(values)
110+
111+
df = cudf.DataFrame()
112+
df["vertex"] = vertices
113+
df["eigenvector_centrality"] = values
114+
115+
if G.renumbered:
116+
df = G.unrenumber(df, "vertex")
117+
118+
if isNx is True:
119+
dict = df_score_to_dictionary(df, "eigenvector_centrality")
120+
return dict
121+
else:
122+
return df

python/cugraph/cugraph/centrality/katz_centrality.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313

14-
from pylibcugraph.experimental import (ResourceHandle,
15-
GraphProperties,
16-
SGGraph,
17-
katz_centrality as pylibcugraph_katz
18-
)
14+
from pylibcugraph import (ResourceHandle,
15+
GraphProperties,
16+
SGGraph,
17+
katz_centrality as pylibcugraph_katz
18+
)
1919
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
2020
df_score_to_dictionary,
2121
)
@@ -74,8 +74,6 @@ def katz_centrality(
7474
The maximum number of iterations before an answer is returned. This can
7575
be used to limit the execution time and do an early exit before the
7676
solver reaches the convergence tolerance.
77-
If this value is lower or equal to 0 cuGraph will use the default
78-
value, which is 100.
7977
8078
tol : float, optional (default=1.0e-6)
8179
Set the tolerance the approximation, this parameter should be a small
@@ -124,10 +122,9 @@ def katz_centrality(
124122
elif (not isinstance(beta, float)) or (beta <= 0.0):
125123
raise ValueError(f"'beta' must be a positive float or None, "
126124
f"got: {beta}")
127-
if (not isinstance(max_iter, int)):
128-
raise ValueError(f"'max_iter' must be an integer, got: {max_iter}")
129-
elif max_iter <= 0:
130-
max_iter = 100
125+
if (not isinstance(max_iter, int)) or (max_iter <= 0):
126+
raise ValueError(f"'max_iter' must be a positive integer"
127+
f", got: {max_iter}")
131128
if (not isinstance(tol, float)) or (tol <= 0.0):
132129
raise ValueError(f"'tol' must be a positive float, got: {tol}")
133130

python/cugraph/cugraph/dask/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@
1919
from .community.louvain import louvain
2020
from .centrality.katz_centrality import katz_centrality
2121
from .components.connectivity import weakly_connected_components
22+
from .centrality.eigenvector_centrality import eigenvector_centrality

0 commit comments

Comments
 (0)