Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
95e29c7
Updating the convolution2D method to accept 3D arrays
antmsbrito Mar 12, 2025
3dc97c2
Fixing wrong index to clamp to edge
antmsbrito Mar 12, 2025
9bb9738
Updating to match new Convolution2D output
antmsbrito Mar 12, 2025
eb2701d
Updating to match previous new Convolution2D output
antmsbrito Mar 12, 2025
e748b00
Update benchmark signature
antmsbrito Mar 13, 2025
00501c5
Updating input checks for 3D arrays
antmsbrito Mar 13, 2025
c50ee1b
Updating dask and transonic implementations to accept 3D arrays
antmsbrito Mar 13, 2025
bdc3070
Add another test to check if 2D convolution still works fine
antmsbrito Mar 13, 2025
1ee10b6
Adding tentative new parameters to RGC (opencl only so far)
antmsbrito Mar 14, 2025
37251af
fixing angle rotation for %2 kernels
brunomsaraiva Mar 14, 2025
aa0cd97
adding missing fixes from branch
brunomsaraiva Mar 20, 2025
0a3cb93
removed unrelated changes
brunomsaraiva Mar 20, 2025
cd2988f
Merge branch 'esrrf_update' of https://github.com/HenriquesLab/NanoPy…
brunomsaraiva Mar 20, 2025
051129c
adding more updates
brunomsaraiva Mar 24, 2025
8672d34
Fixing if clauses
antmsbrito Mar 24, 2025
e747f00
another iteration
brunomsaraiva Mar 24, 2025
c01387a
Merge branch 'esrrf_update' of https://github.com/HenriquesLab/NanoPy…
brunomsaraiva Mar 24, 2025
e73df52
fixed radius calculation, and edges calculation
brunomsaraiva Mar 24, 2025
b1ba1b8
updates to esrrf calculation
brunomsaraiva Apr 1, 2025
f73bf8d
fixed channel registration according to new convolution changes
brunomsaraiva Apr 1, 2025
472fb89
fixed robert's cross in eSRRF cpu runtypes
brunomsaraiva Apr 1, 2025
f0f8445
added conv as part of eSRRF (BUGGED)
brunomsaraiva Apr 1, 2025
f44f7c9
further changes and improvements
brunomsaraiva Apr 1, 2025
ab37f58
furhter changes and improvements
brunomsaraiva Apr 1, 2025
32e8e02
finished fixing 2D gpu implementation
brunomsaraiva Apr 3, 2025
dc7447e
added verbose option to drift and channel reg
brunomsaraiva Apr 3, 2025
afc07c5
iterative changes to eSRRF 3D
brunomsaraiva Apr 4, 2025
2b2af85
update
brunomsaraiva Apr 7, 2025
2c5b4f1
udpated run function
brunomsaraiva Apr 7, 2025
df8f944
updated var name
brunomsaraiva Apr 7, 2025
0dc76a0
fixed typo
brunomsaraiva Apr 7, 2025
d32feab
wrote python side of opencl runtype
brunomsaraiva Apr 7, 2025
8a410c6
bugfixing
brunomsaraiva Apr 8, 2025
33d1cf2
reverting 3d changes
brunomsaraiva Apr 9, 2025
fe7c80f
reverting 3d changes
brunomsaraiva Apr 9, 2025
d5560f4
reverting 3d changes
brunomsaraiva Apr 9, 2025
5e75f97
removing prints
brunomsaraiva Apr 9, 2025
5f62672
reverting 3d changes
brunomsaraiva Apr 9, 2025
f2cb770
removing unwanted files
brunomsaraiva Apr 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions src/include/_c_sr_radial_gradient_convergence.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,47 @@ double _c_calculate_dk(float Gx, float Gy, float dx, float dy, float distance) {
return Dk;
}

float _c_calculate_rgc(int xM, int yM, float* imIntGx, float* imIntGy, int colsM, int rowsM, int magnification, float Gx_Gy_MAGNIFICATION, float fwhm, float tSO, float tSS, float sensitivity) {
void _rotate_vector(float* Gx, float* Gy, float angle) {
float cos_angle = cos(angle);
float sin_angle = sin(angle);

float original_Gx = *Gx;
float original_Gy = *Gy;

*Gx = original_Gx * cos_angle - original_Gy * sin_angle;
*Gy = original_Gx * sin_angle + original_Gy * cos_angle;
}

float _c_calculate_rgc(int xM, int yM, float* imIntGx, float* imIntGy, int colsM, int rowsM, int magnification, float Gx_Gy_MAGNIFICATION, float fwhm, float tSO, float tSS, float sensitivity, float offset, float xyoffset, float angle) {

float vx, vy, Gx, Gy, dx, dy, distance, distanceWeight, GdotR, Dk;

float xc = (xM + 0.5) / magnification;
float yc = (yM + 0.5) / magnification;
float xc = (float)xM / magnification + offset; // offset in non-magnified space
float yc = (float)yM / magnification + offset;

float RGC = 0;
float distanceWeightSum = 0;

int _start = -(int)(Gx_Gy_MAGNIFICATION * fwhm);
int _end = (int)(Gx_Gy_MAGNIFICATION * fwhm + 1);
int _start = -(int)(2 * fwhm);
int _end = (int)(2 * fwhm + 1);

for (int j = _start; j < _end; j++) {
vy = (int)(Gx_Gy_MAGNIFICATION * yc) + j;
vy /= Gx_Gy_MAGNIFICATION;
vy = yc + j;

if (0 < vy && vy <= rowsM - 1) {
if (0 < vy && vy <= rowsM/magnification - 1) {
for (int i = _start; i < _end; i++) {
vx = (int)(Gx_Gy_MAGNIFICATION * xc) + i;
vx /= Gx_Gy_MAGNIFICATION;
vx = xc + i;

if (0 < vx && vx <= colsM - 1) {
if (0 < vx && vx <= colsM/magnification - 1) {
dx = vx - xc;
dy = vy - yc;
distance = sqrt(dx * dx + dy * dy);

if (distance != 0 && distance <= tSO) {
Gx = imIntGx[(int)(vy * magnification * Gx_Gy_MAGNIFICATION * colsM * Gx_Gy_MAGNIFICATION) + (int)(vx * magnification * Gx_Gy_MAGNIFICATION)];
Gy = imIntGy[(int)(vy * magnification * Gx_Gy_MAGNIFICATION * colsM * Gx_Gy_MAGNIFICATION) + (int)(vx * magnification * Gx_Gy_MAGNIFICATION)];
Gx = imIntGx[(int)((vy+xyoffset) * magnification * Gx_Gy_MAGNIFICATION * colsM * Gx_Gy_MAGNIFICATION) + (int)((vx+xyoffset) * magnification * Gx_Gy_MAGNIFICATION)];
Gy = imIntGy[(int)((vy+xyoffset) * magnification * Gx_Gy_MAGNIFICATION * colsM * Gx_Gy_MAGNIFICATION) + (int)((vx+xyoffset) * magnification * Gx_Gy_MAGNIFICATION)];

_rotate_vector(&Gx, &Gy, angle);

distanceWeight = _c_calculate_dw(distance, tSS);
distanceWeightSum += distanceWeight;
Expand Down
5 changes: 4 additions & 1 deletion src/include/_c_sr_radial_gradient_convergence.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ double _c_calculate_dw(double distance, double tSS);

double _c_calculate_dk(float Gx, float Gy, float dx, float dy, float distance);

float _c_calculate_rgc(int xM, int yM, float* imIntGx, float* imIntGy, int colsM, int rowsM, int magnification, float Gx_Gy_MAGNIFICATION, float fwhm, float tSO, float tSS, float sensitivity);
void _rotate_vector(float* Gx, float* Gy, float angle);

float _c_calculate_rgc(int xM, int yM, float* imIntGx, float* imIntGy, int colsM, int rowsM, int magnification, float Gx_Gy_MAGNIFICATION, float fwhm, float tSO, float tSS, float sensitivity, float offset, float xyoffset, float angle);

double _c_calculate_dw3D_isotropic(double distance, double tSS);

Expand All @@ -21,6 +23,7 @@ double _c_calculate_dk3D(float Gx, float Gy, float Gz, float dx, float dy, float

float _c_calculate_rgc3D(int xM, int yM, int sliceM, float* imIntGx, float* imIntGy, float* imIntGz, int colsM, int rowsM, int slicesM, int magnification_xy, int magnification_z, float ratio_px, float Gx_Gy_MAGNIFICATION, float Gz_MAGNIFICATION, float fwhm, float fwhm_z, float tSO, float tSO_z, float tSS, float tSS_z, float sensitivity);


float _c_calculate_rgc_3d(int cM, int rM, int sM, float* imIntGx, float* imIntGy, float* imIntGz, int colsM, int rowsM, int slicesM, int magnification_xy, int magnification_z, float Gx_Gy_MAGNIFICATION, float fwhm, float fwhm_z, float tSO, float tSS, float sensitivity);

#endif
22 changes: 11 additions & 11 deletions src/liquid_benchmarks/_le_convolution/Convolution.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Cuda:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.0004861999999974387
- 0.0004765000000048758
Expand All @@ -22,7 +22,7 @@ Cuda:
- 0.00047459999998977764
- 0.0004846000000213735
Dask:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.00937319999999886
- 0.00935059999999055
Expand All @@ -45,7 +45,7 @@ Dask:
- 0.009444899999976997
- 0.008983199999988756
Numba:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.005503999999987741
- 0.005404399999989096
Expand All @@ -68,7 +68,7 @@ Numba:
- 0.006126800000004096
- 0.00559069999999906
OpenCL:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.09695740000000796
- 0.0791657999999984
Expand All @@ -91,7 +91,7 @@ OpenCL:
- 0.07781260000001566
- 0.07502910000002316
Python:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 2.4462038000000064
- 2.4630003999999985
Expand All @@ -114,7 +114,7 @@ Python:
- 2.3908779999999865
- 2.423667199999983
Threaded:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.000824199999996722
- 0.0008181999999976597
Expand All @@ -137,7 +137,7 @@ Threaded:
- 0.0007410999999990509
- 0.00074050000000625
Threaded_dynamic:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.0004450999999932037
- 0.00044110000000330274
Expand All @@ -160,7 +160,7 @@ Threaded_dynamic:
- 0.00042849999999816646
- 0.00042849999999816646
Threaded_guided:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.00045450000000357704
- 0.0004828000000003385
Expand All @@ -183,7 +183,7 @@ Threaded_guided:
- 0.0004787000000021635
- 0.000453700000008439
Threaded_static:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.0004264999999890051
- 0.0006580000000013797
Expand All @@ -206,7 +206,7 @@ Threaded_static:
- 0.00047909999997841624
- 0.0004892999999981384
Transonic:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.005411799999990308
- 0.00537510000000907
Expand All @@ -229,7 +229,7 @@ Transonic:
- 0.005397600000009106
- 0.005529200000012224
Unthreaded:
(['shape(100, 100)', 'shape(23, 23)'], {}):
(['shape(1, 100, 100)', 'shape(23, 23)'], {}):
- 5290000
- 0.006530499999996664
- 0.0064456000000063796
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,9 @@ def _gaussian_filter(inpimg, _runtype, sigma):
knl1 = np.exp(-0.5 / (sigma*sigma) * x1 ** 2)
knl1 = knl1 / knl1.sum()
knl1 = knl1.astype(np.float32)

img1 = conv.run(inpimg,knl1.reshape((len(x1), 1)),run_type=_runtype)
img1 = np.expand_dims(img1, axis=0)
img2 = conv.run(img1,knl1.reshape((1, len(x1))),run_type=_runtype)

return img2

class ChannelRegistrationEstimator(LiquidEngine):
Expand Down
92 changes: 51 additions & 41 deletions src/mako_templates/nanopyx.core.transform._le_convolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ from cython.parallel import parallel, prange
from libc.math cimport cos, sin

from .__interpolation_tools__ import check_image, value2array
from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic
from .convolution import convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic
from ...__liquid_engine__ import LiquidEngine
from ...__opencl__ import cl, cl_array, _fastest_device

Expand All @@ -29,23 +29,25 @@ class Convolution(LiquidEngine):
clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)

def run(self, image, kernel, run_type=None):
image = check_array(image)
image = check_image(image)
return self._run(image, kernel, run_type=run_type)

def benchmark(self, image, kernel):
image = check_image(image)
return super().benchmark(image, kernel)

% for sch in schedulers:
def _run_${sch}(self, float[:,:] image, float[:,:] kernel):
def _run_${sch}(self, float[:,:,:] image, float[:,:] kernel):
"""
@cpu
% if sch!='unthreaded':
@threaded
% endif
@cython
"""
cdef int nRows = image.shape[0]
cdef int nCols = image.shape[1]
cdef int nFrames = image.shape[0]
cdef int nRows = image.shape[1]
cdef int nCols = image.shape[2]

cdef int nRows_kernel = kernel.shape[0]
cdef int nCols_kernel = kernel.shape[1]
Expand All @@ -61,29 +63,30 @@ class Convolution(LiquidEngine):
cdef float acc = 0


conv_out = np.zeros((nRows, nCols), dtype=np.float32)
cdef float[:,:] _conv_out = conv_out
conv_out = np.zeros((nFrames, nRows, nCols), dtype=np.float32)
cdef float[:,:,:] _conv_out = conv_out

with nogil:
% if sch=='unthreaded':
for r in range(nRows):
for c in range(nCols):
% elif sch=='threaded':
for r in prange(nRows):
for c in prange(nCols):
% else:
for r in prange(nRows,schedule="${sch.split('_')[1]}"):
for c in prange(nCols,schedule="${sch.split('_')[1]}"):
% endif
acc = 0
for kr in range(nRows_kernel):
for kc in range(nCols_kernel):
local_row = min(max(r+(kr-center_r),0),nRows-1)
local_col = min(max(c+(kc-center_c),0),nCols-1)
acc = acc + kernel[kr,kc] * image[local_row, local_col]
_conv_out[r,c] = acc

return conv_out
for f in range(nFrames):
% if sch=='unthreaded':
for r in range(nRows):
for c in range(nCols):
% elif sch=='threaded':
for r in prange(nRows):
for c in prange(nCols):
% else:
for r in prange(nRows,schedule="${sch.split('_')[1]}"):
for c in prange(nCols,schedule="${sch.split('_')[1]}"):
% endif
acc = 0
for kr in range(nRows_kernel):
for kc in range(nCols_kernel):
local_row = min(max(r+(kr-center_r),0),nRows-1)
local_col = min(max(c+(kc-center_c),0),nCols-1)
acc = acc + kernel[kr,kc] * image[f,local_row, local_col]
_conv_out[f,r,c] = acc

return np.squeeze(conv_out)

% endfor

Expand All @@ -99,60 +102,67 @@ class Convolution(LiquidEngine):
dc = device['device']
cl_queue = cl.CommandQueue(cl_ctx)

image_out = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32)
image_out = np.zeros((image.shape[0], image.shape[1], image.shape[2]), dtype=np.float32)
mf = cl.mem_flags

input_image = cl.image_from_array(cl_ctx, image, mode='r')
input_kernel = cl.image_from_array(cl_ctx, kernel, mode='r')
output_opencl = cl.image_from_array(cl_ctx, image_out, mode='w')
input_image = cl.Buffer(cl_ctx, mf.READ_ONLY, image.nbytes)
cl.enqueue_copy(cl_queue, input_image, image).wait()

input_kernel = cl.Buffer(cl_ctx, mf.READ_ONLY, kernel.nbytes)
cl.enqueue_copy(cl_queue, input_kernel, kernel).wait()

output_opencl = cl.Buffer(cl_ctx, mf.WRITE_ONLY, image_out.nbytes)

kernelsize = kernel.shape[0]
cl_queue.finish()

code = self._get_cl_code("_le_convolution.cl", device['DP'])
prg = cl.Program(cl_ctx, code).build()
knl = prg.conv2d
knl = prg.conv2d_2

knl(cl_queue,
(1,image.shape[0], image.shape[1]),
self.get_work_group(device['device'],(1,image.shape[0], image.shape[1])),
(image.shape[0],image.shape[1],image.shape[2]),
None,#self.get_work_group(device['device'],(image.shape[0], image.shape[1], image.shape[2])),
input_image,
output_opencl,
input_kernel).wait()
input_kernel,
np.int32(kernelsize)).wait()

cl.enqueue_copy(cl_queue, image_out, output_opencl,origin=(0,0), region=(image.shape[0], image.shape[1])).wait()
cl.enqueue_copy(cl_queue, image_out, output_opencl).wait()

cl_queue.finish()
return image_out
return np.squeeze(image_out)

def _run_python(self, image, kernel):
"""
@cpu
"""
return convolution2D_python(image, kernel).astype(np.float32)
return np.squeeze(convolution2D_python(image, kernel))

def _run_transonic(self, image, kernel):
"""
@cpu
@threaded
"""
return convolution2D_transonic(image, kernel).astype(np.float32)
return np.squeeze(convolution2D_transonic(image, kernel))

def _run_dask(self, image, kernel):
"""
@cpu
@threaded
"""
return convolution2D_dask(image, kernel).astype(np.float32)
return np.squeeze(convolution2D_dask(image, kernel))

def _run_cuda(self, image, kernel):
"""
@gpu
"""
return convolution2D_cuda(image, kernel).astype(np.float32)
return np.squeeze(convolution2D_cuda(image, kernel))

def _run_numba(self, image, kernel):
"""
@cpu
@threaded
@numba
"""
return convolution2D_numba(image, kernel).astype(np.float32)
return np.squeeze(convolution2D_numba(image, kernel))
Loading
Loading