Skip to content

Commit 272e316

Browse files
authored
Updates pytest benchmarks to use synthetic data and multi-GPUs (#3540)
closes #2810 closes #3282 * Adds the ability to use datasets read from files on disk and/or RMAT-generated synthetic datasets. * Adds markers for "file_data" and "rmat_data" for use by benchmark scripts, based on cluster size. * Adds CLI options for specifying the RMAT scale and edgefactor in order to generate datasets large enough for MNMG runs. * Adds fixtures for use by `bench_algos.py` benchmarks which will instantiate graph objs based on dataset type and SG or MG markers. * Updated `Dataset` class to allow instances to be used as test params and properly provide human-readable/deterministic test IDs. * Added ability for `Dataset` ctor to take a .csf file as input, useful when a metadata.yaml file for a dataset has not been created yet. * Added options to `get_test_data.sh` in the CI scripts to download a subset of datasets for C++ (to save time/space since most datasets aren't needed), and to only download the benchmark data for python (for use when running benchmarks as tests). Authors: - Rick Ratzel (https://github.com/rlratzel) - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) - Vibhu Jawa (https://github.com/VibhuJawa) - Ray Douglass (https://github.com/raydouglass) URL: #3540
1 parent 6eb4f0a commit 272e316

File tree

20 files changed

+763
-410
lines changed

20 files changed

+763
-410
lines changed

benchmarks/cugraph-dgl/pytest-based/bench_cugraph_dgl_uniform_neighbor_sample.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
def create_graph(graph_data):
4040
"""
4141
Create a graph instance based on the data to be loaded/generated.
42-
"""
42+
"""
4343
print("Initalize Pool on client")
4444
rmm.reinitialize(pool_allocator=True)
4545
# Assume strings are names of datasets in the datasets package
@@ -77,7 +77,7 @@ def create_graph(graph_data):
7777
num_nodes_dict = {'_N':num_nodes}
7878

7979
gs = CuGraphStorage(num_nodes_dict=num_nodes_dict, single_gpu=True)
80-
gs.add_edge_data(edgelist_df,
80+
gs.add_edge_data(edgelist_df,
8181
# reverse to make same graph as cugraph
8282
node_col_names=['dst', 'src'],
8383
canonical_etype=['_N', 'connects', '_N'])
@@ -90,11 +90,9 @@ def create_mg_graph(graph_data):
9090
"""
9191
Create a graph instance based on the data to be loaded/generated.
9292
"""
93-
## Reserving GPU 0 for client(trainer/service project)
94-
n_devices = os.getenv('DASK_NUM_WORKERS', 4)
95-
n_devices = int(n_devices)
93+
# range starts at 1 to let let 0 be used by benchmark/client process
94+
visible_devices = os.getenv("DASK_WORKER_DEVICES", "1,2,3,4")
9695

97-
visible_devices = ','.join([str(i) for i in range(1, n_devices+1)])
9896
cluster = LocalCUDACluster(protocol='ucx', rmm_pool_size='25GB', CUDA_VISIBLE_DEVICES=visible_devices)
9997
client = Client(cluster)
10098
Comms.initialize(p2p=True)
@@ -137,7 +135,7 @@ def create_mg_graph(graph_data):
137135
num_nodes_dict = {'_N':num_nodes}
138136

139137
gs = CuGraphStorage(num_nodes_dict=num_nodes_dict, single_gpu=False)
140-
gs.add_edge_data(edgelist_df,
138+
gs.add_edge_data(edgelist_df,
141139
node_col_names=['dst', 'src'],
142140
canonical_etype=['_N', 'C', '_N'])
143141
return (gs, client, cluster)
@@ -166,7 +164,7 @@ def get_uniform_neighbor_sample_args(
166164
num_start_verts = int(num_verts * 0.25)
167165
else:
168166
num_start_verts = batch_size
169-
167+
170168
srcs = G.graphstore.gdata.get_edge_data()['_SRC_']
171169
start_list = srcs.head(num_start_verts)
172170
assert len(start_list) == num_start_verts
@@ -229,7 +227,7 @@ def bench_cugraph_dgl_uniform_neighbor_sample(
229227
fanout_val.reverse()
230228
sampler = dgl.dataloading.NeighborSampler(uns_args["fanout"])
231229
sampler_f = sampler.sample_blocks
232-
230+
233231
# Warmup
234232
_ = sampler_f(g=G, seed_nodes=uns_args["seed_nodes"])
235233
# print(f"\n{uns_args}")

0 commit comments

Comments
 (0)