Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions notebooks/applications/gen_550M.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import gc\n",
"import os\n",
"from time import perf_counter\n",
"import numpy as np\n",
"import random\n",
"import math\n",
"\n",
"# rapids\n",
"import cugraph\n",
"import cudf\n",
"\n",
"# NetworkX libraries\n",
"import networkx as nx\n",
"\n",
"# RMAT data generator\n",
"from cugraph.generators import rmat\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def generate_data(scale, edgefactor=16):\n",
" _gdf = rmat(\n",
" scale,\n",
" (2 ** scale) * edgefactor,\n",
" 0.57,\n",
" 0.19,\n",
" 0.19,\n",
" 42,\n",
" clip_and_flip=False,\n",
" scramble_vertex_ids=True,\n",
" create_using=None, # return edgelist instead of Graph instance\n",
" mg=False\n",
" )\n",
" print('Generating a dataframe of ' + str(len(_gdf)) + '...')\n",
" return _gdf"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def gen_times(count, start_date, end_date):\n",
" range_start = start_date.timestamp()\n",
" range_end = int(end_date.timestamp())\n",
" random_list = []\n",
" for i in range(count):\n",
" random_list.append(random.randint(range_start,range_end))\n",
" return cudf.Series(random_list,name='Date', dtype=int)\n",
"# return [datetime.fromtimestamp(i) for i in random_list]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def gen_amounts(count,value_range):\n",
" random_list = []\n",
" for i in range(count):\n",
" random_list.append(random.randint(0,value_range*100))\n",
" return cudf.Series(random_list,name='amount', dtype=float).divide(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"start_time = '1/1/2022 01:00:00 AM'\n",
"end_time = '7/1/2022 01:00:00 AM'\n",
"amount_range = 25000\n",
"d1 = datetime.strptime(start_time, '%m/%d/%Y %I:%M:%S %p')\n",
"d2 = datetime.strptime(end_time, '%m/%d/%Y %I:%M:%S %p')\n",
"\n",
"df = generate_data(15)\n",
"\n",
"dates = gen_times(len(df),d1, d2)\n",
"amounts = gen_amounts(len(df),amount_range)\n",
"df['amounts'] = amounts\n",
"df['date'] = dates\n",
"len(df)\n",
"df.head(4)\n",
"df.to_csv('../data/data_500m.csv') #append mode"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "cudfdev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "587ff963ecd34554a9da41c94362e2baa062d9a57502e220f049e10816826984"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading