Skip to content

Commit 50141c6

Browse files
committed
add two ver for blog writing
1 parent ca8fb22 commit 50141c6

File tree

2 files changed

+339
-0
lines changed

2 files changed

+339
-0
lines changed

cpu_based_version.ipynb

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Download Example Data"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"# ! FILEID=\"1OO0tUguZMyQ1d37K7F9jiwV7mm_z2yuD\" && FILENAME=\"example_data.npy\" && wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=$FILEID\" -O $FILENAME && rm -rf /tmp/cookies.txt"
17+
]
18+
},
19+
{
20+
"cell_type": "markdown",
21+
"metadata": {},
22+
"source": [
23+
"## Import Numba CUDA"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": 2,
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"from numba import cuda\n",
33+
"import numpy as np\n",
34+
"import math"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 3,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"data = np.load('example_data.npy')"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"## Original (CPU-based)"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 4,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"def ridge_detection(f, thres):\n",
60+
" count = np.zeros(f.shape)\n",
61+
" for i in range(len(f)):\n",
62+
" for j in range(len(f[i])):\n",
63+
" if (\n",
64+
" i > 0\n",
65+
" and j > 0\n",
66+
" and i < (len(f) - 1)\n",
67+
" and j < (len(f[i]) - 1)\n",
68+
" and f[i, j] > thres\n",
69+
" and ~np.isnan(f[i, j])\n",
70+
" ):\n",
71+
" step_i = i\n",
72+
" step_j = j\n",
73+
" for k in range(1000):\n",
74+
" if (\n",
75+
" step_i == 0\n",
76+
" or step_j == 0\n",
77+
" or step_i == (len(f) - 1)\n",
78+
" or step_j == (len(f[i]) - 1)\n",
79+
" ):\n",
80+
" break\n",
81+
" index = np.nanargmax(\n",
82+
" f[step_i - 1 : step_i + 2, step_j - 1 : step_j + 2].data\n",
83+
" )\n",
84+
" vmax = np.nanmax(\n",
85+
" f[step_i - 1 : step_i + 2, step_j - 1 : step_j + 2].data\n",
86+
" )\n",
87+
" if index == 4 or vmax == f[step_i, step_j] or np.isnan(vmax):\n",
88+
" break\n",
89+
" row = int(index / 3)\n",
90+
" col = index % 3\n",
91+
" count[step_i - 1 + row, step_j - 1 + col] += 1\n",
92+
" step_i = step_i - 1 + row\n",
93+
" step_j = step_j - 1 + col\n",
94+
" return count"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": 5,
100+
"metadata": {},
101+
"outputs": [
102+
{
103+
"name": "stdout",
104+
"output_type": "stream",
105+
"text": [
106+
"CPU times: user 6min 14s, sys: 15.9 s, total: 6min 30s\n",
107+
"Wall time: 6min 6s\n"
108+
]
109+
}
110+
],
111+
"source": [
112+
"%%time\n",
113+
"results = ridge_detection(data, 0)"
114+
]
115+
},
116+
{
117+
"cell_type": "code",
118+
"execution_count": 6,
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
122+
"# %timeit -r 7 -n 1 ridge_detection(data, 0)"
123+
]
124+
}
125+
],
126+
"metadata": {
127+
"kernelspec": {
128+
"display_name": "Python 3",
129+
"language": "python",
130+
"name": "python3"
131+
},
132+
"language_info": {
133+
"codemirror_mode": {
134+
"name": "ipython",
135+
"version": 3
136+
},
137+
"file_extension": ".py",
138+
"mimetype": "text/x-python",
139+
"name": "python",
140+
"nbconvert_exporter": "python",
141+
"pygments_lexer": "ipython3",
142+
"version": "3.6.10"
143+
}
144+
},
145+
"nbformat": 4,
146+
"nbformat_minor": 4
147+
}

cuda_python_version.ipynb

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Download Example Data"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"# ! FILEID=\"1OO0tUguZMyQ1d37K7F9jiwV7mm_z2yuD\" && FILENAME=\"example_data.npy\" && wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=$FILEID\" -O $FILENAME && rm -rf /tmp/cookies.txt"
17+
]
18+
},
19+
{
20+
"cell_type": "markdown",
21+
"metadata": {},
22+
"source": [
23+
"## Import Numba CUDA"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": 2,
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"from numba import cuda\n",
33+
"import numpy as np\n",
34+
"import math"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 3,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"data = np.load('example_data.npy')"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"## Numba (CUDA Python)"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 7,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"@cuda.jit\n",
60+
"def cuda_ridge_detection(f, count, thres):\n",
61+
" start_i, start_j = cuda.grid(2)\n",
62+
" stride_i, stride_j = cuda.gridsize(2)\n",
63+
" for i in range(start_i, f.shape[0], stride_i):\n",
64+
" for j in range(start_j, f.shape[1], stride_j):\n",
65+
" if (\n",
66+
" i > 0\n",
67+
" and j > 0\n",
68+
" and i < (f.shape[0] - 1)\n",
69+
" and j < (f.shape[1] - 1)\n",
70+
" and f[i, j] > thres\n",
71+
" and ~math.isnan(f[i, j])\n",
72+
" ):\n",
73+
" step_i = i\n",
74+
" step_j = j\n",
75+
" for k in range(1000):\n",
76+
" if (\n",
77+
" step_i == 0\n",
78+
" or step_j == 0\n",
79+
" or step_i == (f.shape[0] - 1)\n",
80+
" or step_j == (f.shape[1] - 1)\n",
81+
" ):\n",
82+
" break\n",
83+
" index = 4\n",
84+
" vmax = -np.inf\n",
85+
" for ii in range(3):\n",
86+
" for jj in range(3):\n",
87+
" if f[step_i + ii - 1, step_j + jj - 1] > vmax:\n",
88+
" vmax = f[step_i + ii - 1, step_j + jj - 1]\n",
89+
" index = jj + 3 * ii\n",
90+
" if index == 4 or vmax == f[step_i, step_j] or math.isnan(vmax):\n",
91+
" break\n",
92+
" row = int(index / 3)\n",
93+
" col = index % 3\n",
94+
" cuda.atomic.add(count, (step_i - 1 + row, step_j - 1 + col), 1)\n",
95+
" step_i = step_i - 1 + row\n",
96+
" step_j = step_j - 1 + col"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 8,
102+
"metadata": {},
103+
"outputs": [],
104+
"source": [
105+
"def test_func(data):\n",
106+
" device_data = cuda.to_device(data)\n",
107+
" device_results = cuda.device_array_like(device_data)\n",
108+
" cuda_ridge_detection[(8, 8), (8, 32)](device_data, device_results, 0)\n",
109+
" cuda_results = device_results.copy_to_host()\n",
110+
" return cuda_results"
111+
]
112+
},
113+
{
114+
"cell_type": "code",
115+
"execution_count": 9,
116+
"metadata": {},
117+
"outputs": [],
118+
"source": [
119+
"cuda_results = test_func(data)\n",
120+
"np.testing.assert_almost_equal(results, cuda_results)"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": 10,
126+
"metadata": {},
127+
"outputs": [
128+
{
129+
"name": "stdout",
130+
"output_type": "stream",
131+
"text": [
132+
"1.67 ms ± 8.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
133+
]
134+
}
135+
],
136+
"source": [
137+
"%timeit -r 7 -n 1000 test_func(data)"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"metadata": {},
143+
"source": [
144+
"## Speedup by 200,000+ times!!!!!\n",
145+
"\n",
146+
"- CPU-based solution cost 366 seconds (366000 ms)\n",
147+
"- CUDA Python solution cost 0.00167 seconds (1.67 ms)"
148+
]
149+
},
150+
{
151+
"cell_type": "code",
152+
"execution_count": 11,
153+
"metadata": {},
154+
"outputs": [
155+
{
156+
"data": {
157+
"text/plain": [
158+
"219161.6766467066"
159+
]
160+
},
161+
"execution_count": 11,
162+
"metadata": {},
163+
"output_type": "execute_result"
164+
}
165+
],
166+
"source": [
167+
"366000 / 1.67"
168+
]
169+
}
170+
],
171+
"metadata": {
172+
"kernelspec": {
173+
"display_name": "Python 3",
174+
"language": "python",
175+
"name": "python3"
176+
},
177+
"language_info": {
178+
"codemirror_mode": {
179+
"name": "ipython",
180+
"version": 3
181+
},
182+
"file_extension": ".py",
183+
"mimetype": "text/x-python",
184+
"name": "python",
185+
"nbconvert_exporter": "python",
186+
"pygments_lexer": "ipython3",
187+
"version": "3.6.10"
188+
}
189+
},
190+
"nbformat": 4,
191+
"nbformat_minor": 4
192+
}

0 commit comments

Comments
 (0)