Skip to content

Commit cd2a91b

Browse files
authored
Create a notebook comparing nx and cuGraph using synthetic data (#3135)
Rapids visualization group asked for some data comparing cuGraph algos to nx. Used Release notebook as a base, replaced datasets with RMAT data generator. Closes #3134 Authors: - Don Acosta (https://github.com/acostadon) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: #3135
1 parent 6e9b518 commit cd2a91b

File tree

2 files changed

+1006
-0
lines changed

2 files changed

+1006
-0
lines changed
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import gc\n",
10+
"import os\n",
11+
"from time import perf_counter\n",
12+
"import numpy as np\n",
13+
"import random\n",
14+
"import math\n",
15+
"\n",
16+
"# rapids\n",
17+
"import cugraph\n",
18+
"import cudf\n",
19+
"\n",
20+
"# NetworkX libraries\n",
21+
"import networkx as nx\n",
22+
"\n",
23+
"# RMAT data generator\n",
24+
"from cugraph.generators import rmat\n",
25+
"from datetime import datetime"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {},
32+
"outputs": [],
33+
"source": [
34+
"def generate_data(scale, edgefactor=16):\n",
35+
" _gdf = rmat(\n",
36+
" scale,\n",
37+
" (2 ** scale) * edgefactor,\n",
38+
" 0.57,\n",
39+
" 0.19,\n",
40+
" 0.19,\n",
41+
" 42,\n",
42+
" clip_and_flip=False,\n",
43+
" scramble_vertex_ids=True,\n",
44+
" create_using=None, # return edgelist instead of Graph instance\n",
45+
" mg=False\n",
46+
" )\n",
47+
" print('Generating a dataframe of ' + str(len(_gdf)) + '...')\n",
48+
" return _gdf"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": null,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": [
57+
"def gen_times(count, start_date, end_date):\n",
58+
" range_start = start_date.timestamp()\n",
59+
" range_end = int(end_date.timestamp())\n",
60+
" random_list = []\n",
61+
" for i in range(count):\n",
62+
" random_list.append(random.randint(range_start,range_end))\n",
63+
" return cudf.Series(random_list,name='Date', dtype=int)\n",
64+
"# return [datetime.fromtimestamp(i) for i in random_list]"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"metadata": {},
71+
"outputs": [],
72+
"source": [
73+
"def gen_amounts(count,value_range):\n",
74+
" random_list = []\n",
75+
" for i in range(count):\n",
76+
" random_list.append(random.randint(0,value_range*100))\n",
77+
" return cudf.Series(random_list,name='amount', dtype=float).divide(100)"
78+
]
79+
},
80+
{
81+
"cell_type": "code",
82+
"execution_count": null,
83+
"metadata": {},
84+
"outputs": [],
85+
"source": []
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": null,
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"start_time = '1/1/2022 01:00:00 AM'\n",
94+
"end_time = '7/1/2022 01:00:00 AM'\n",
95+
"amount_range = 25000\n",
96+
"d1 = datetime.strptime(start_time, '%m/%d/%Y %I:%M:%S %p')\n",
97+
"d2 = datetime.strptime(end_time, '%m/%d/%Y %I:%M:%S %p')\n",
98+
"\n",
99+
"df = generate_data(15)\n",
100+
"\n",
101+
"dates = gen_times(len(df),d1, d2)\n",
102+
"amounts = gen_amounts(len(df),amount_range)\n",
103+
"df['amounts'] = amounts\n",
104+
"df['date'] = dates\n",
105+
"len(df)\n",
106+
"df.head(4)\n",
107+
"df.to_csv('../data/data_500m.csv') #append mode"
108+
]
109+
}
110+
],
111+
"metadata": {
112+
"kernelspec": {
113+
"display_name": "cudfdev",
114+
"language": "python",
115+
"name": "python3"
116+
},
117+
"language_info": {
118+
"codemirror_mode": {
119+
"name": "ipython",
120+
"version": 3
121+
},
122+
"file_extension": ".py",
123+
"mimetype": "text/x-python",
124+
"name": "python",
125+
"nbconvert_exporter": "python",
126+
"pygments_lexer": "ipython3",
127+
"version": "3.9.15"
128+
},
129+
"orig_nbformat": 4,
130+
"vscode": {
131+
"interpreter": {
132+
"hash": "587ff963ecd34554a9da41c94362e2baa062d9a57502e220f049e10816826984"
133+
}
134+
}
135+
},
136+
"nbformat": 4,
137+
"nbformat_minor": 2
138+
}

0 commit comments

Comments
 (0)