Skip to content

Commit 9edc8f7

Browse files
authored
Created notebook for running louvain algorithm on a Multi-GPU Property Graph (#3130)
resolves #2609 This notebook specifically creates a multi-GPU Property graph. Might need to create a single-GPU later but this shows use of the Property Graph interface. Also updated the mg_louvain notebook to function with change to Comms. Authors: - Don Acosta (https://github.com/acostadon) - Brad Rees (https://github.com/BradReesWork) Approvers: - Jim Scott (https://github.com/kingmesal) - Brad Rees (https://github.com/BradReesWork) URL: #3130
1 parent a51bfa7 commit 9edc8f7

File tree

2 files changed

+257
-162
lines changed

2 files changed

+257
-162
lines changed

notebooks/demo/mg_louvain.ipynb

Lines changed: 28 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"cells": [
33
{
4+
"attachments": {},
45
"cell_type": "markdown",
56
"metadata": {},
67
"source": [
@@ -9,7 +10,7 @@
910
"\n",
1011
"In this notebook, we will show how to use multiple GPUs in cuGraph to compute the Louvain partitions and global modularity score for a dataset.\n",
1112
"\n",
12-
"This notebook was tested using RAPIDS 21.12 and CUDA 11.4. Please be aware that your system may be different, and you may need to modify the code or install packages to run the below examples. If you think you have found a bug or an error, please file an issue in [cuGraph](https://github.com/rapidsai/cugraph/issues)\n",
13+
"This notebook was tested using RAPIDS 23.02 and CUDA 11.5. Please be aware that your system may be different, and you may need to modify the code or install packages to run the below examples. If you think you have found a bug or an error, please file an issue in [cuGraph](https://github.com/rapidsai/cugraph/issues)\n",
1314
"\n",
1415
"\n",
1516
"CuGraph's multi-GPU features leverage Dask. RAPIDS has other projects based on Dask such as dask-cudf and dask-cuda. These products will also be used in this example. Check out [RAPIDS.ai](https://rapids.ai/) to learn more about these technologies."
@@ -25,14 +26,14 @@
2526
},
2627
{
2728
"cell_type": "code",
28-
"execution_count": 1,
29+
"execution_count": null,
2930
"metadata": {},
3031
"outputs": [],
3132
"source": [
3233
"# Import needed libraries. We recommend using the [cugraph_dev](https://github.com/rapidsai/cugraph/tree/branch-21.12/conda/environments) env through conda\n",
3334
"from dask.distributed import Client, wait\n",
3435
"from dask_cuda import LocalCUDACluster\n",
35-
"import cugraph.comms as Comms\n",
36+
"from cugraph.dask.comms import comms as Comms\n",
3637
"import cugraph.dask as dask_cugraph\n",
3738
"import cugraph\n",
3839
"import dask_cudf\n",
@@ -58,19 +59,11 @@
5859
},
5960
{
6061
"cell_type": "code",
61-
"execution_count": 2,
62+
"execution_count": null,
6263
"metadata": {
6364
"tags": []
6465
},
65-
"outputs": [
66-
{
67-
"name": "stdout",
68-
"output_type": "stream",
69-
"text": [
70-
"Your data file, ../data/hollywood.csv, already exists\n"
71-
]
72-
}
73-
],
66+
"outputs": [],
7467
"source": [
7568
"import urllib.request\n",
7669
"import os\n",
@@ -109,18 +102,9 @@
109102
},
110103
{
111104
"cell_type": "code",
112-
"execution_count": 3,
105+
"execution_count": null,
113106
"metadata": {},
114-
"outputs": [
115-
{
116-
"name": "stderr",
117-
"output_type": "stream",
118-
"text": [
119-
"distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n",
120-
"distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n"
121-
]
122-
}
123-
],
107+
"outputs": [],
124108
"source": [
125109
"cluster = LocalCUDACluster()\n",
126110
"client = Client(cluster)\n",
@@ -137,7 +121,7 @@
137121
},
138122
{
139123
"cell_type": "code",
140-
"execution_count": 4,
124+
"execution_count": null,
141125
"metadata": {
142126
"tags": []
143127
},
@@ -150,7 +134,7 @@
150134
"chunksize = dask_cugraph.get_chunksize(input_data_path)\n",
151135
"\n",
152136
"# Multi-GPU CSV reader\n",
153-
"e_list = dask_cudf.read_csv(input_data_path, chunksize = chunksize, delimiter=' ', names=['src', 'dst'], dtype=['int32', 'int32'])\n"
137+
"e_list = dask_cudf.read_csv(input_data_path, chunksize = chunksize, delimiter=' ', names=['src', 'dst'], dtype=['int32', 'int32'])"
154138
]
155139
},
156140
{
@@ -162,22 +146,14 @@
162146
},
163147
{
164148
"cell_type": "code",
165-
"execution_count": 5,
149+
"execution_count": null,
166150
"metadata": {
167151
"tags": []
168152
},
169-
"outputs": [
170-
{
171-
"name": "stdout",
172-
"output_type": "stream",
173-
"text": [
174-
"Read, load and renumber: 121.63822555541992 s\n"
175-
]
176-
}
177-
],
153+
"outputs": [],
178154
"source": [
179155
"# Create a directed graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n",
180-
"G = cugraph.DiGraph()\n",
156+
"G = cugraph.Graph()\n",
181157
"G.from_dask_cudf_edgelist(e_list, source='src', destination='dst')\n",
182158
"\n",
183159
"# Print time\n",
@@ -193,20 +169,12 @@
193169
},
194170
{
195171
"cell_type": "code",
196-
"execution_count": 6,
172+
"execution_count": null,
197173
"metadata": {
198174
"scrolled": true,
199175
"tags": []
200176
},
201-
"outputs": [
202-
{
203-
"name": "stdout",
204-
"output_type": "stream",
205-
"text": [
206-
"Louvain: 8.262660264968872 s\n"
207-
]
208-
}
209-
],
177+
"outputs": [],
210178
"source": [
211179
"# Start Pagerank timer\n",
212180
"t_start = time.time()\n",
@@ -236,127 +204,19 @@
236204
},
237205
{
238206
"cell_type": "code",
239-
"execution_count": 7,
207+
"execution_count": null,
240208
"metadata": {
241209
"scrolled": true,
242210
"tags": []
243211
},
244-
"outputs": [
245-
{
246-
"data": {
247-
"text/html": [
248-
"<div>\n",
249-
"<style scoped>\n",
250-
" .dataframe tbody tr th:only-of-type {\n",
251-
" vertical-align: middle;\n",
252-
" }\n",
253-
"\n",
254-
" .dataframe tbody tr th {\n",
255-
" vertical-align: top;\n",
256-
" }\n",
257-
"\n",
258-
" .dataframe thead th {\n",
259-
" text-align: right;\n",
260-
" }\n",
261-
"</style>\n",
262-
"<table border=\"1\" class=\"dataframe\">\n",
263-
" <thead>\n",
264-
" <tr style=\"text-align: right;\">\n",
265-
" <th></th>\n",
266-
" <th>partition</th>\n",
267-
" <th>vertex</th>\n",
268-
" </tr>\n",
269-
" </thead>\n",
270-
" <tbody>\n",
271-
" <tr>\n",
272-
" <th>0</th>\n",
273-
" <td>1</td>\n",
274-
" <td>484873</td>\n",
275-
" </tr>\n",
276-
" <tr>\n",
277-
" <th>1</th>\n",
278-
" <td>22951</td>\n",
279-
" <td>410343</td>\n",
280-
" </tr>\n",
281-
" <tr>\n",
282-
" <th>2</th>\n",
283-
" <td>1</td>\n",
284-
" <td>486216</td>\n",
285-
" </tr>\n",
286-
" <tr>\n",
287-
" <th>3</th>\n",
288-
" <td>22970</td>\n",
289-
" <td>98213</td>\n",
290-
" </tr>\n",
291-
" <tr>\n",
292-
" <th>4</th>\n",
293-
" <td>22970</td>\n",
294-
" <td>100890</td>\n",
295-
" </tr>\n",
296-
" <tr>\n",
297-
" <th>...</th>\n",
298-
" <td>...</td>\n",
299-
" <td>...</td>\n",
300-
" </tr>\n",
301-
" <tr>\n",
302-
" <th>569610</th>\n",
303-
" <td>1571</td>\n",
304-
" <td>1077778</td>\n",
305-
" </tr>\n",
306-
" <tr>\n",
307-
" <th>569611</th>\n",
308-
" <td>0</td>\n",
309-
" <td>629654</td>\n",
310-
" </tr>\n",
311-
" <tr>\n",
312-
" <th>569612</th>\n",
313-
" <td>33</td>\n",
314-
" <td>963203</td>\n",
315-
" </tr>\n",
316-
" <tr>\n",
317-
" <th>569613</th>\n",
318-
" <td>27308</td>\n",
319-
" <td>1077821</td>\n",
320-
" </tr>\n",
321-
" <tr>\n",
322-
" <th>569614</th>\n",
323-
" <td>0</td>\n",
324-
" <td>239788</td>\n",
325-
" </tr>\n",
326-
" </tbody>\n",
327-
"</table>\n",
328-
"<p>1139905 rows × 2 columns</p>\n",
329-
"</div>"
330-
],
331-
"text/plain": [
332-
" partition vertex\n",
333-
"0 1 484873\n",
334-
"1 22951 410343\n",
335-
"2 1 486216\n",
336-
"3 22970 98213\n",
337-
"4 22970 100890\n",
338-
"... ... ...\n",
339-
"569610 1571 1077778\n",
340-
"569611 0 629654\n",
341-
"569612 33 963203\n",
342-
"569613 27308 1077821\n",
343-
"569614 0 239788\n",
344-
"\n",
345-
"[1139905 rows x 2 columns]"
346-
]
347-
},
348-
"execution_count": 7,
349-
"metadata": {},
350-
"output_type": "execute_result"
351-
}
352-
],
212+
"outputs": [],
353213
"source": [
354-
"louvain_df.compute()\n"
214+
"louvain_df.compute()"
355215
]
356216
},
357217
{
358218
"cell_type": "code",
359-
"execution_count": 8,
219+
"execution_count": null,
360220
"metadata": {},
361221
"outputs": [],
362222
"source": [
@@ -366,11 +226,12 @@
366226
]
367227
},
368228
{
229+
"attachments": {},
369230
"cell_type": "markdown",
370231
"metadata": {},
371232
"source": [
372233
"___\n",
373-
"Copyright (c) 2021, NVIDIA CORPORATION.\n",
234+
"Copyright (c) 2021-2023, NVIDIA CORPORATION.\n",
374235
"\n",
375236
"Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n",
376237
"\n",
@@ -381,7 +242,7 @@
381242
],
382243
"metadata": {
383244
"kernelspec": {
384-
"display_name": "Python 3 (ipykernel)",
245+
"display_name": "cudfdev",
385246
"language": "python",
386247
"name": "python3"
387248
},
@@ -395,7 +256,12 @@
395256
"name": "python",
396257
"nbconvert_exporter": "python",
397258
"pygments_lexer": "ipython3",
398-
"version": "3.8.10"
259+
"version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 15:55:03) \n[GCC 10.4.0]"
260+
},
261+
"vscode": {
262+
"interpreter": {
263+
"hash": "587ff963ecd34554a9da41c94362e2baa062d9a57502e220f049e10816826984"
264+
}
399265
}
400266
},
401267
"nbformat": 4,

0 commit comments

Comments
 (0)