Skip to content

Commit b2e58f5

Browse files
authored
Add python end to end benchmark and create new directories (#1750)
Update README Create dir python_e2e and add python MG benchmark Create dir python_pytest_based Authors: - Joseph Nke (https://github.com/jnke2016) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) URL: #1750
1 parent 82eb7c8 commit b2e58f5

File tree

12 files changed

+886
-12
lines changed

12 files changed

+886
-12
lines changed

benchmarks/python_e2e/README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# cuGraph benchmarks
2+
3+
## Overview
4+
5+
The sources are currently intended to benchmark `cuGraph` via the python API,
6+
but future updates may include benchmarks written in C++ or other languages.
7+
8+
The benchmarks here use datasets generated by the RMAT graph generator but also
9+
support csv files as input.
10+
11+
## Prerequisites
12+
### Python
13+
* cugraph built and installed (or `cugraph` sources and built C++ extensions
14+
available on `PYTHONPATH`)
15+
16+
## Usage
17+
* Run `python main.py --help` for a list of all available benchmark options
18+
19+
* Expose the desired GPUs to the benchmark run via CUDA_VISIBLE_DEVICES
20+
21+
* Run `bash ./run_all_nightly_benches.sh` for automated benchmarks with multiple
22+
scales, gpu configuration and edgefactor
23+
24+
* Run `python main.py` to run individual algo benchmarks with specific
25+
options. For example, to benchmark a 2-GPU run of BFS and SSSP with a
26+
generated graph of size scale 23:
27+
```
28+
(rapids) user@machine:/cugraph/benchmarks/python_e2e> export CUDA_VISIBLE_DEVICES=0,1
29+
30+
(rapids) user@machine:/cugraph/benchmarks/python_e2e> python main.py --scale=23 --algo=bfs --algo=sssp
31+
calling setup...distributed.preloading - INFO - Import preload module: dask_cuda.initialize
32+
distributed.preloading - INFO - Import preload module: dask_cuda.initialize
33+
done.
34+
running generate_edgelist (RMAT)...done.
35+
running from_dask_cudf_edgelist...done.
36+
running compute_renumber_edge_list...done.
37+
running compute_renumber_edge_list...done.
38+
running bfs (warmup)...done.
39+
running bfs...done.
40+
running sssp (warmup)...done.
41+
running sssp...done.
42+
from_dask_cudf_edgelist() 0.0133009
43+
------------------------------------------------------------
44+
bfs(start:73496080) 0.569328
45+
------------------------------------------------------------
46+
sssp(start:73496080) 1.48114
47+
48+
calling teardown...done.
49+
```
50+
51+
## Other Examples:
52+
_**NOTE: Some algos require the graph to be symmetrized (Louvain, WCC) or unweighted.**_
53+
* Run all the benchmarks with a generated datasets of scale=23
54+
```
55+
(rapids) user@machine:/cugraph/benchmarks/python_e2e> python main.py --scale=23
56+
```
57+
58+
* Run all the benchmarks with a generated unweighted dataset of scale=23
59+
```
60+
(rapids) user@machine:/cugraph/benchmarks/python_e2e> python main.py --scale=23 --unweighted
61+
```
62+
63+
* Symmetrize the generated dataset of scale=23 and run all the benchmarks
64+
```
65+
(rapids) user@machine:/cugraph/benchmarks/python_e2e> python main.py --scale=23 --symmetric-graph
66+
```
67+
68+
* Create a graph from a csv file an run all the benchmarks
69+
```
70+
(rapids) user@machine:/cugraph/benchmarks/python_e2e> python main.py --csv='karate.csv'
71+
```

benchmarks/python_e2e/benchmark.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Copyright (c) 2021, NVIDIA CORPORATION.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import sys
15+
import time
16+
from functools import wraps
17+
18+
19+
class BenchmarkedResult:
20+
"""
21+
Class to hold results (the return value of the callable being benchmarked
22+
and meta-data about the benchmarked function) of a benchmarked function run.
23+
"""
24+
def __init__(self, name, retval, runtime, params=None):
25+
self.name = name
26+
self.retval = retval
27+
self.runtime = runtime
28+
self.params = params or {}
29+
self.validator_result = True
30+
31+
32+
def benchmark(func):
33+
"""
34+
Returns a callable/closure that wraps func with code to time the func call
35+
and return a BenchmarkedResult. The resulting callable takes the same
36+
args/kwargs as func.
37+
38+
The BenchmarkedResult will have its params value assigned from the kwargs
39+
dictionary, but the func positional args are not captured. If a user needs
40+
the params captured for reporting purposes, they must use kwargs. This is
41+
useful since positional args can be used for args that would not be
42+
meaningful in a benchmark result as a param to the benchmark.
43+
44+
This can be used as a function decorator or a standalone function to wrap
45+
functions to benchmark.
46+
"""
47+
benchmark_name = getattr(func, "benchmark_name", func.__name__)
48+
@wraps(func)
49+
def benchmark_wrapper(*func_args, **func_kwargs):
50+
t1 = time.perf_counter()
51+
retval = func(*func_args, **func_kwargs)
52+
t2 = time.perf_counter()
53+
return BenchmarkedResult(name=benchmark_name,
54+
retval=retval,
55+
runtime=(t2-t1),
56+
params=func_kwargs,
57+
)
58+
59+
# Assign the name to the returned callable as well for use in debug prints,
60+
# etc.
61+
benchmark_wrapper.name = benchmark_name
62+
return benchmark_wrapper
63+
64+
65+
class BenchmarkRun:
66+
"""
67+
Represents a benchmark "run", which can be executed by calling the run()
68+
method, and results are saved as BenchmarkedResult instances in the results
69+
list member.
70+
"""
71+
def __init__(self,
72+
input_dataframe,
73+
construct_graph_func,
74+
algo_func_param_list,
75+
algo_validator_list=None
76+
):
77+
self.input_dataframe = input_dataframe
78+
79+
if type(construct_graph_func) is tuple:
80+
(construct_graph_func,
81+
self.construct_graph_func_args) = construct_graph_func
82+
else:
83+
self.construct_graph_func_args = None
84+
85+
# Create benchmark instances for each algo/func to be timed.
86+
# FIXME: need to accept and save individual algo args
87+
self.construct_graph = benchmark(construct_graph_func)
88+
89+
#add starting node to algos: BFS and SSSP
90+
for i, algo in enumerate (algo_func_param_list):
91+
if benchmark(algo).name in ["bfs", "sssp"]:
92+
param={}
93+
param["start"]=self.input_dataframe['src'].head()[0]
94+
algo_func_param_list[i]=(algo,)+(param,)
95+
96+
self.algos = []
97+
for item in algo_func_param_list:
98+
if type(item) is tuple:
99+
(algo, params) = item
100+
else:
101+
(algo, params) = (item, {})
102+
self.algos.append((benchmark(algo), params))
103+
104+
self.validators = algo_validator_list or [None] * len(self.algos)
105+
self.results = []
106+
107+
108+
@staticmethod
109+
def __log(s, end="\n"):
110+
print(s, end=end)
111+
sys.stdout.flush()
112+
113+
114+
def run(self):
115+
"""
116+
Run and time the graph construction step, then run and time each algo.
117+
"""
118+
self.results = []
119+
120+
self.__log(f"running {self.construct_graph.name}...", end="")
121+
result = self.construct_graph(self.input_dataframe,
122+
*self.construct_graph_func_args)
123+
self.__log("done.")
124+
G = result.retval
125+
self.results.append(result)
126+
127+
#algos with transposed=True : PageRank, Katz
128+
#algos with transposed=False: BFS, SSSP, Louvain
129+
for i in range(len(self.algos)):
130+
if self.algos[i][0].name in ["pagerank", "katz"]: #set transpose=True when renumbering
131+
if self.algos[i][0].name == "katz" and self.construct_graph.name == "from_dask_cudf_edgelist":
132+
largest_out_degree = G.out_degree().compute().\
133+
nlargest(n=1, columns="degree") #compute outdegree before renumbering because outdegree has transpose=False
134+
largest_out_degree = largest_out_degree["degree"].iloc[0]
135+
katz_alpha = 1 / (largest_out_degree + 1)
136+
self.algos[i][1]["alpha"] = katz_alpha
137+
elif self.algos[i][0].name == "katz" and self.construct_graph.name == "from_cudf_edgelist":
138+
largest_out_degree = G.out_degree().nlargest(n=1, columns="degree")
139+
largest_out_degree = largest_out_degree["degree"].iloc[0]
140+
katz_alpha = 1 / (largest_out_degree + 1)
141+
self.algos[i][1]["alpha"] = katz_alpha
142+
if hasattr(G, "compute_renumber_edge_list"):
143+
G.compute_renumber_edge_list(transposed=True)
144+
else: #set transpose=False when renumbering
145+
self.__log("running compute_renumber_edge_list...", end="")
146+
if hasattr(G, "compute_renumber_edge_list"):
147+
G.compute_renumber_edge_list(transposed=False)
148+
self.__log("done.")
149+
# FIXME: need to handle individual algo args
150+
for ((algo, params), validator) in zip(self.algos, self.validators):
151+
self.__log(f"running {algo.name} (warmup)...", end="")
152+
algo(G, **params)
153+
self.__log("done.")
154+
self.__log(f"running {algo.name}...", end="")
155+
result = algo(G, **params)
156+
self.__log("done.")
157+
158+
if validator:
159+
result.validator_result = validator(result.retval, G)
160+
161+
self.results.append(result)
162+
# Reclaim memory since computed algo result is no longer needed
163+
result.retval = None
164+
165+
return False not in [r.validator_result for r in self.results]

0 commit comments

Comments
 (0)