diff --git a/setup.py b/setup.py index 6c87a225..63ee768a 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ ] EXTRA_LING_ARGS = [] -VERSION = "1.3.0" # sets version number for whole package +VERSION = "1.3.1" # sets version number for whole package def run_command(command: str) -> str: diff --git a/src/include/_c_gradients.c b/src/include/_c_gradients.c index da58a31a..58f33c89 100644 --- a/src/include/_c_gradients.c +++ b/src/include/_c_gradients.c @@ -24,20 +24,26 @@ void _c_gradient_radiality(float* image, float* imGc, float* imGr, int rows, } } +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + // as in REF: // https://github.com/HenriquesLab/NanoJ-eSRRF/blob/785c71b3bd508c938f63bb780cba47b0f1a5b2a7/resources/liveSRRF.cl // under calculateGradient_2point -void _c_gradient_2point(float* image, float* imGc, float* imGr, int rows, +void _c_gradient_two_point(float* image, float* imGc, float* imGr, int rows, int cols) { - int c0, r0, c1, r1; - for (int j = 1; j < rows; j++) { - r1 = j * cols; - r0 = (j - 1) * cols; - for (int i = 1; i < cols; i++) { - c1 = i; - c0 = i - 1; - imGc[r1 + i] = image[r1 + c1] - image[r1 + c0]; - imGr[r1 + i] = image[r1 + c1] - image[r0 + c1]; + int c1, r1; + int c0, r0; + int offset; + + for (r1 = 0; r1 < rows; r1++) { + for (c1 = 0; c1 < cols; c1++) { + offset = r1 * cols + c1; + r0 = MAX(r1 - 1, 0); + c0 = MAX(c1 - 1, 0); + + imGc[offset] = image[offset] - image[r1 * cols + c0]; + imGr[offset] = image[offset] - image[r0 * cols + c1]; + } } } diff --git a/src/include/_c_gradients.h b/src/include/_c_gradients.h index fb946bf4..18c514f9 100644 --- a/src/include/_c_gradients.h +++ b/src/include/_c_gradients.h @@ -4,7 +4,7 @@ void _c_gradient_radiality(float* image, float* imGc, float* imGr, int rows, int cols); -void _c_gradient_2point(float* image, float* imGc, float* imGr, int rows, +void _c_gradient_two_point(float* image, float* imGc, float* imGr, int rows, int cols); void _c_gradient_roberts_cross(float* image, float* imGc, float* imGr, int rows, int cols); diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx index 66b9f0c1..b0b0c06e 100644 --- a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx @@ -1,6 +1,6 @@ <%! schedulers = ['threaded','threaded_guided','threaded_dynamic','threaded_static'] -%># cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=False +%># cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=True import numpy as np @@ -17,6 +17,7 @@ from ...__opencl__ import cl, cl_array, _fastest_device from ._le_interpolation_catmull_rom import ShiftAndMagnify from ._le_roberts_cross_gradients import GradientRobertsCross from ._le_radial_gradient_convergence import RadialGradientConvergence +from ._interpolation import fht_space_interpolation as dht_interpolation class eSRRF(LiquidEngine): @@ -40,123 +41,18 @@ class eSRRF(LiquidEngine): """ @gpu """ - if device is None: - device = _fastest_device - - # TODO doIntensityWeighting is irrelevant on gpu2 - cl_ctx = cl.Context([device['device']]) - dc = device['device'] - cl_queue = cl.CommandQueue(cl_ctx) - - output_shape = (image.shape[0], int(image.shape[1]*magnification), int(image.shape[2]*magnification)) - - # needs input image, first interpolation output, roberts cross output, magnified gradients and output image - - total_memory = (3*image[0, :, :].nbytes) + (2*np.zeros((1, output_shape[1], output_shape[2]), dtype=np.float32).nbytes) + (2*np.zeros((1, output_shape[1]*2, output_shape[2]*2), dtype=np.float32).nbytes) - output_image = np.zeros(output_shape, dtype=np.float32) - - max_slices = int((dc.global_mem_size // total_memory)/mem_div) - max_slices = self._check_max_slices(image, max_slices) - - mf = cl.mem_flags - input_cl = cl.Buffer(cl_ctx, mf.READ_ONLY, self._check_max_buffer_size(image[0:max_slices, :, :].nbytes, dc, max_slices)) - output_cl = cl.Buffer(cl_ctx, mf.WRITE_ONLY, self._check_max_buffer_size(np.empty((max_slices, output_shape[1], output_shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - magnified_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, output_shape[1], output_shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - col_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1], image.shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - row_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1], image.shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - col_magnified_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1]*magnification*2, image.shape[2]*magnification*2), dtype=np.float32).nbytes, dc, max_slices)) - row_magnified_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1]*magnification*2, image.shape[2]*magnification*2), dtype=np.float32).nbytes, dc, max_slices)) - cl.enqueue_copy(cl_queue, input_cl, image[0:max_slices,:,:]).wait() - - cr_code = self._get_cl_code("_le_interpolation_catmull_rom_.cl", device['DP']) - cr_prg = cl.Program(cl_ctx, cr_code).build(options=["-cl-mad-enable -cl-fast-relaxed-math"]) - cr_knl = cr_prg.shiftAndMagnify - - rc_code = self._get_cl_code("_le_roberts_cross_gradients.cl", device['DP']) - rc_prg = cl.Program(cl_ctx, rc_code).build(options=["-cl-mad-enable -cl-fast-relaxed-math"]) - rc_knl = rc_prg.gradient_roberts_cross - - rgc_code = self._get_cl_code("_le_radial_gradient_convergence.cl", device['DP']) - rgc_prg = cl.Program(cl_ctx, rgc_code).build(options=["-cl-mad-enable -cl-fast-relaxed-math"]) - rgc_knl = rgc_prg.calculate_rgc - - margin = int(radius * magnification) - cdef int lowest_row = margin - cdef int highest_row = output_shape[1] - margin - cdef int lowest_col = margin - cdef int highest_col = output_shape[2] - margin - - for i in range(0, image.shape[0], max_slices): - if image.shape[0] - i >= max_slices: - n_slices = max_slices - else: - n_slices = image.shape[0] - i - - cr_knl(cl_queue, - (n_slices, int(image.shape[1]*magnification), int(image.shape[2]*magnification)), - None, - input_cl, - magnified_cl, - np.float32(0), - np.float32(0), - np.float32(magnification), - np.float32(magnification)).wait() - - rc_knl(cl_queue, - (n_slices,), - None, - input_cl, - col_gradients_cl, - row_gradients_cl, - np.int32(image.shape[1]), - np.int32(image.shape[2])).wait() - - cr_knl(cl_queue, - (n_slices, int(image.shape[1]*magnification*2), int(image.shape[2]*magnification*2)), - None, - col_gradients_cl, - col_magnified_gradients_cl, - np.float32(0), - np.float32(0), - np.float32(magnification*2), - np.float32(magnification*2)).wait() - - cr_knl(cl_queue, - (n_slices, int(image.shape[1]*magnification*2), int(image.shape[2]*magnification*2)), - None, - row_gradients_cl, - row_magnified_gradients_cl, - np.float32(0), - np.float32(0), - np.float32(magnification*2), - np.float32(magnification*2)).wait() - - rgc_knl(cl_queue, - (n_slices, highest_row - lowest_row, highest_col - lowest_col), - None, - col_magnified_gradients_cl, - row_magnified_gradients_cl, - magnified_cl, - output_cl, - np.int32(image.shape[2]*magnification), - np.int32(image.shape[1]*magnification), - np.int32(magnification), - np.float32(2), - np.float32(radius), - np.float32(2 * (radius / 2.355) + 1), - np.float32(2 * (radius / 2.355) * (radius / 2.355)), - np.float32(sensitivity), - np.int32(doIntensityWeighting)).wait() - - cl.enqueue_copy(cl_queue, output_image[i:i+n_slices,:,:], output_cl).wait() - - if i+n_slices(gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int colsM = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rowsM = image_interp.shape[1] + cdef int colsM = image_interp.shape[2] cdef float [:,:,:] rgc_map = np.zeros((nFrames, rowsM, colsM), dtype=np.float32) @@ -91,8 +91,8 @@ class RadialGradientConvergence(LiquidEngine): cdef int margin = int(fwhm * magnification) cdef int nFrames = gradient_col_interp.shape[0] - cdef int rowsM = (gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int colsM = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rowsM = image_interp.shape[1] + cdef int colsM = image_interp.shape[2] cdef float [:,:,:] rgc_map = np.zeros((nFrames, rowsM, colsM), dtype=np.float32) @@ -141,8 +141,8 @@ class RadialGradientConvergence(LiquidEngine): # Sizes cdef int nFrames = gradient_col_interp.shape[0] - cdef int rows_interpolated = (gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int cols_interpolated = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rows_interpolated = image_interp.shape[1] + cdef int cols_interpolated = image_interp.shape[2] # Grid size of the global work space lowest_row = margin diff --git a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx index d3e8ae1c..922dfbbf 100644 --- a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx @@ -10,6 +10,8 @@ from ...__liquid_engine__ import LiquidEngine from cython.parallel import prange from .__interpolation_tools__ import check_image +from scipy.stats import pearsonr as pearson_correlation + cdef extern from "_c_gradients.h": void _c_gradient_roberts_cross(float* pixels, float* GxArray, float* GyArray, int w, int h) nogil @@ -28,6 +30,25 @@ class GradientRobertsCross(LiquidEngine): def benchmark(self, image): image = check_image(image) return super().benchmark(image) + + def _compare_runs(self, output_1, output_2): + """@public""" + if isinstance(output_1, tuple): + output_1 = np.concatenate((output_1[0], output_1[1]), axis=0) + if isinstance(output_2, tuple): + output_2 = np.concatenate((output_2[0], output_2[1]), axis=0) + if output_1.ndim > 2: + pcc = 0 + for i in range(output_1.shape[0]): + pcc += pearson_correlation(output_1[i, :, :].flatten(), output_2[i, :, :].flatten()).statistic + pcc /= output_1.shape[0] + else: + pcc = pearson_correlation(output_1.flatten(), output_2.flatten()).statistic + + if pcc > 0.8: + return True + else: + return False def _run_unthreaded(self, float[:,:,:] image): """ diff --git a/src/nanopyx/core/transform/_interpolation.pyx b/src/nanopyx/core/transform/_interpolation.pyx index e3c6cd7c..2b979b20 100644 --- a/src/nanopyx/core/transform/_interpolation.pyx +++ b/src/nanopyx/core/transform/_interpolation.pyx @@ -1,10 +1,12 @@ -# cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=True, autogen_pxd=True +# cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=True, autogen_pxd=False import numpy as np cimport numpy as np import cython from math import floor +import numpy.fft as fft +from libc.math cimport ceil, log2, pow from ._le_interpolation_catmull_rom import ShiftAndMagnify as ShiftMagnify_CR @@ -99,3 +101,119 @@ def interpolate_3d_zlinear(image, magnification_xy: int = 5, magnification_z: in z_interpolated = np.asarray(xy_interpolated) return z_interpolated + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef float[:, :] _mirror_padding_even_square_c(float[:, :] img): + cdef int h = img.shape[0] + cdef int w = img.shape[1] + + # Get power-of-2 dimension >= each side, then pick square + cdef int bitW = pow(2, ceil(log2(w))) + cdef int bitH = pow(2, ceil(log2(h))) + cdef int outputDim = max(bitH, bitW) + + cdef int scaleW = ceil(outputDim / float(w)) + if scaleW % 2 == 0: + scaleW += 1 + cdef int scaleH = ceil(outputDim / float(h)) + if scaleH % 2 == 0: + scaleH += 1 + + cdef int i, j, sH, sW, p, q + cdef float[:, :] padded = np.zeros((scaleH * h, scaleW * w), dtype=np.float32) + + # use integer arithmetic for the tile-flip decision + cdef int midW = (scaleW - 1) // 2 + cdef int midH = (scaleH - 1) // 2 + + for j in range(h): + for i in range(w): + for sH in range(scaleH): + for sW in range(scaleW): + if ((sW - midW) % 2) == 0: + p = i + else: + p = w - 1 - i + if ((sH - midH) % 2) == 0: + q = j + else: + q = h - 1 - j + padded[j + sH * h, i + sW * w] = img[q, p] + + # Crop to square outputDim + cdef int xROI = (scaleW * w - outputDim) // 2 + cdef int yROI = (scaleH * h - outputDim) // 2 + return padded[yROI:yROI + outputDim, xROI:xROI + outputDim] + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef float[:, :] _fht_space_interpolation_c(float[:, :] img, int intFactor, bint doMirrorPadding): + cdef float[:, :] working_img + cdef float orig_min = np.min(img) + cdef float orig_max = np.max(img) + cdef float scale_factor + + if doMirrorPadding: + working_img = _mirror_padding_even_square_c(img) + else: + working_img = img + + # Forward FFT (use complex128 for accuracy) + cdef complex[:, :] F = fft.fft2(np.asarray(working_img)) + cdef complex[:, :] Fshift = fft.fftshift(F) + + cdef int h = Fshift.shape[0] + cdef int w = Fshift.shape[1] + cdef int hInt = h * intFactor + cdef int wInt = w * intFactor + + # Zero-padded enlarged spectrum and place centered block + cdef complex[:, :] FInt = np.zeros((hInt, wInt), dtype=np.complex128) + + # center positions (integer) + cdef int xROI = (wInt - w) // 2 + cdef int yROI = (hInt - h) // 2 + + # place the whole centered block at once (faster and correct) + FInt[yROI:yROI + h, xROI:xROI + w] = Fshift + + # Inverse transform + cdef complex[:, :] result = fft.ifft2(fft.ifftshift(FInt)) + cdef float[:, :] output = np.real(result).astype(np.float32) + + # Scale to match input range (robust to constant images) + cdef float out_min = np.min(output) + cdef float out_max = np.max(output) + if out_max != out_min: + scale_factor = (orig_max - orig_min) / (out_max - out_min) + output = (np.asarray(output) - out_min) * scale_factor + orig_min + + if doMirrorPadding: + xROI = (wInt - intFactor * img.shape[1]) // 2 + yROI = (hInt - intFactor * img.shape[0]) // 2 + return output[yROI:yROI + intFactor * img.shape[0], + xROI:xROI + intFactor * img.shape[1]] + return output + +def fht_space_interpolation(image, int magnification=2, bint doMirrorPadding=True): + """ + Fourier interpolation of 2D images or 3D stacks + + Parameters: + image: 2D or 3D numpy array + magnification: Integer interpolation factor + doMirrorPadding: If True, applies mirror padding before FFT + """ + cdef float[:, :] img2d + if image.ndim == 2: + img2d = np.ascontiguousarray(image, dtype=np.float32) + return np.ascontiguousarray(np.asarray(_fht_space_interpolation_c(img2d, magnification, doMirrorPadding))) + elif image.ndim == 3: + return np.ascontiguousarray(np.stack([np.asarray(_fht_space_interpolation_c( + np.ascontiguousarray(frame, dtype=np.float32), + magnification, doMirrorPadding)) for frame in image])) + else: + raise ValueError("Input must be 2D or 3D array") diff --git a/src/nanopyx/core/transform/_le_esrrf.pyx b/src/nanopyx/core/transform/_le_esrrf.pyx index b1e4fbb9..193a001b 100644 --- a/src/nanopyx/core/transform/_le_esrrf.pyx +++ b/src/nanopyx/core/transform/_le_esrrf.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=False +# cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=True import numpy as np @@ -15,6 +15,7 @@ from ...__opencl__ import cl, cl_array, _fastest_device from ._le_interpolation_catmull_rom import ShiftAndMagnify from ._le_roberts_cross_gradients import GradientRobertsCross from ._le_radial_gradient_convergence import RadialGradientConvergence +from ._interpolation import fht_space_interpolation as dht_interpolation class eSRRF(LiquidEngine): @@ -38,123 +39,18 @@ class eSRRF(LiquidEngine): """ @gpu """ - if device is None: - device = _fastest_device - - # TODO doIntensityWeighting is irrelevant on gpu2 - cl_ctx = cl.Context([device['device']]) - dc = device['device'] - cl_queue = cl.CommandQueue(cl_ctx) - - output_shape = (image.shape[0], int(image.shape[1]*magnification), int(image.shape[2]*magnification)) - - # needs input image, first interpolation output, roberts cross output, magnified gradients and output image - - total_memory = (3*image[0, :, :].nbytes) + (2*np.zeros((1, output_shape[1], output_shape[2]), dtype=np.float32).nbytes) + (2*np.zeros((1, output_shape[1]*2, output_shape[2]*2), dtype=np.float32).nbytes) - output_image = np.zeros(output_shape, dtype=np.float32) - - max_slices = int((dc.global_mem_size // total_memory)/mem_div) - max_slices = self._check_max_slices(image, max_slices) - - mf = cl.mem_flags - input_cl = cl.Buffer(cl_ctx, mf.READ_ONLY, self._check_max_buffer_size(image[0:max_slices, :, :].nbytes, dc, max_slices)) - output_cl = cl.Buffer(cl_ctx, mf.WRITE_ONLY, self._check_max_buffer_size(np.empty((max_slices, output_shape[1], output_shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - magnified_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, output_shape[1], output_shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - col_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1], image.shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - row_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1], image.shape[2]), dtype=np.float32).nbytes, dc, max_slices)) - col_magnified_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1]*magnification*2, image.shape[2]*magnification*2), dtype=np.float32).nbytes, dc, max_slices)) - row_magnified_gradients_cl = cl.Buffer(cl_ctx, mf.READ_WRITE, self._check_max_buffer_size(np.empty((max_slices, image.shape[1]*magnification*2, image.shape[2]*magnification*2), dtype=np.float32).nbytes, dc, max_slices)) - cl.enqueue_copy(cl_queue, input_cl, image[0:max_slices,:,:]).wait() - - cr_code = self._get_cl_code("_le_interpolation_catmull_rom_.cl", device['DP']) - cr_prg = cl.Program(cl_ctx, cr_code).build(options=["-cl-mad-enable -cl-fast-relaxed-math"]) - cr_knl = cr_prg.shiftAndMagnify - - rc_code = self._get_cl_code("_le_roberts_cross_gradients.cl", device['DP']) - rc_prg = cl.Program(cl_ctx, rc_code).build(options=["-cl-mad-enable -cl-fast-relaxed-math"]) - rc_knl = rc_prg.gradient_roberts_cross - - rgc_code = self._get_cl_code("_le_radial_gradient_convergence.cl", device['DP']) - rgc_prg = cl.Program(cl_ctx, rgc_code).build(options=["-cl-mad-enable -cl-fast-relaxed-math"]) - rgc_knl = rgc_prg.calculate_rgc - - margin = int(radius * magnification) - cdef int lowest_row = margin - cdef int highest_row = output_shape[1] - margin - cdef int lowest_col = margin - cdef int highest_col = output_shape[2] - margin - - for i in range(0, image.shape[0], max_slices): - if image.shape[0] - i >= max_slices: - n_slices = max_slices - else: - n_slices = image.shape[0] - i - - cr_knl(cl_queue, - (n_slices, int(image.shape[1]*magnification), int(image.shape[2]*magnification)), - None, - input_cl, - magnified_cl, - np.float32(0), - np.float32(0), - np.float32(magnification), - np.float32(magnification)).wait() - - rc_knl(cl_queue, - (n_slices,), - None, - input_cl, - col_gradients_cl, - row_gradients_cl, - np.int32(image.shape[1]), - np.int32(image.shape[2])).wait() - - cr_knl(cl_queue, - (n_slices, int(image.shape[1]*magnification*2), int(image.shape[2]*magnification*2)), - None, - col_gradients_cl, - col_magnified_gradients_cl, - np.float32(0), - np.float32(0), - np.float32(magnification*2), - np.float32(magnification*2)).wait() - - cr_knl(cl_queue, - (n_slices, int(image.shape[1]*magnification*2), int(image.shape[2]*magnification*2)), - None, - row_gradients_cl, - row_magnified_gradients_cl, - np.float32(0), - np.float32(0), - np.float32(magnification*2), - np.float32(magnification*2)).wait() - - rgc_knl(cl_queue, - (n_slices, highest_row - lowest_row, highest_col - lowest_col), - None, - col_magnified_gradients_cl, - row_magnified_gradients_cl, - magnified_cl, - output_cl, - np.int32(image.shape[2]*magnification), - np.int32(image.shape[1]*magnification), - np.int32(magnification), - np.float32(2), - np.float32(radius), - np.float32(2 * (radius / 2.355) + 1), - np.float32(2 * (radius / 2.355) * (radius / 2.355)), - np.float32(sensitivity), - np.int32(doIntensityWeighting)).wait() - - cl.enqueue_copy(cl_queue, output_image[i:i+n_slices,:,:], output_cl).wait() - - if i+n_slices 0 ? c1 - 1 : 0; + r0 = r1 > 0 ? r1 - 1 : 0; + + offset = r1 * cols + c1; + left_offset = r1 * cols + c0; // (x0, y1) + top_offset = r0 * cols + c1; // (x1, y0) + + // 2-point gradient (same as calculateGradient_2point) + imGc[offset] = image[offset] - image[left_offset]; + imGr[offset] = image[offset] - image[top_offset]; + } + } +} + +__kernel void gradient_two_point(__global float* image, + __global float* imGc, + __global float* imGr, + int rows, + int cols) + { + int f = get_global_id(0); + + _c_gradient_two_point(&image[f*rows*cols], &imGc[f*rows*cols], &imGr[f*rows*cols], rows, cols); + } \ No newline at end of file diff --git a/src/nanopyx/core/transform/_le_gradient_two_point.pyx b/src/nanopyx/core/transform/_le_gradient_two_point.pyx new file mode 100644 index 00000000..8b65b835 --- /dev/null +++ b/src/nanopyx/core/transform/_le_gradient_two_point.pyx @@ -0,0 +1,178 @@ +# cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=False + +import numpy as np +cimport numpy as np +from ...__opencl__ import cl, cl_array, _fastest_device +from ...__liquid_engine__ import LiquidEngine + +from cython.parallel import prange +from .__interpolation_tools__ import check_image + +cdef extern from "_c_gradients.h": + void _c_gradient_two_point(float* pixels, float* GxArray, float* GyArray, int w, int h) nogil + +class GradientTwoPoint(LiquidEngine): + + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): + self._designation = "GradientTwoPoint" + super().__init__( + clear_benchmarks=clear_benchmarks, testing=testing, + verbose=verbose) + + def run(self, image, run_type = None): + image = check_image(image) + return self._run(image, run_type=run_type) + + def benchmark(self, image): + image = check_image(image) + return super().benchmark(image) + + def _run_unthreaded(self, float[:,:,:] image): + """ + @cpu + @cython + """ + cdef int nFrames = image.shape[0] + cdef float [:,:,:] gradient_col = np.zeros_like(image) + cdef float [:,:,:] gradient_row = np.zeros_like(image) + + cdef int n + with nogil: + for n in range(nFrames): + _c_gradient_two_point(&image[n,0,0], &gradient_col[n,0,0], &gradient_row[n,0,0], image.shape[1], image.shape[2]) + + return gradient_col, gradient_row + + def _run_threaded(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ + + cdef int nFrames = image.shape[0] + cdef float [:,:,:] gradient_col = np.zeros_like(image) + cdef float [:,:,:] gradient_row = np.zeros_like(image) + + cdef int n + with nogil: + for n in prange(nFrames): + _c_gradient_two_point(&image[n,0,0], &gradient_col[n,0,0], &gradient_row[n,0,0], image.shape[1], image.shape[2]) + + return gradient_col, gradient_row + def _run_threaded_guided(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ + + cdef int nFrames = image.shape[0] + cdef float [:,:,:] gradient_col = np.zeros_like(image) + cdef float [:,:,:] gradient_row = np.zeros_like(image) + + cdef int n + with nogil: + for n in prange(nFrames, schedule="guided"): + _c_gradient_two_point(&image[n,0,0], &gradient_col[n,0,0], &gradient_row[n,0,0], image.shape[1], image.shape[2]) + + return gradient_col, gradient_row + def _run_threaded_dynamic(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ + + cdef int nFrames = image.shape[0] + cdef float [:,:,:] gradient_col = np.zeros_like(image) + cdef float [:,:,:] gradient_row = np.zeros_like(image) + + cdef int n + with nogil: + for n in prange(nFrames, schedule="dynamic"): + _c_gradient_two_point(&image[n,0,0], &gradient_col[n,0,0], &gradient_row[n,0,0], image.shape[1], image.shape[2]) + + return gradient_col, gradient_row + def _run_threaded_static(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ + + cdef int nFrames = image.shape[0] + cdef float [:,:,:] gradient_col = np.zeros_like(image) + cdef float [:,:,:] gradient_row = np.zeros_like(image) + + cdef int n + with nogil: + for n in prange(nFrames, schedule="static"): + _c_gradient_two_point(&image[n,0,0], &gradient_col[n,0,0], &gradient_row[n,0,0], image.shape[1], image.shape[2]) + + return gradient_col, gradient_row + + def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device + + # QUEUE AND CONTEXT + cl_ctx = cl.Context([device['device']]) + dc = device['device'] + cl_queue = cl.CommandQueue(cl_ctx) + + # Swap row and columns because opencl is strange and stores the + # array in a buffer in fortran ordering despite the original + # numpy array being in C order. + #image = np.ascontiguousarray(np.swapaxes(image, 1, 2), dtype=np.float32) + + cdef int nFrames = image.shape[0] + cdef int row = image.shape[1] + cdef int col = image.shape[2] + cdef float [:,:,:] gradient_col = np.zeros_like(image) + cdef float [:,:,:] gradient_row = np.zeros_like(image) + + max_slices = int((dc.global_mem_size // (image[0,:,:].nbytes + gradient_col[0,:,:].nbytes + gradient_row[0,:,:].nbytes))/mem_div) + max_slices = self._check_max_slices(image, max_slices) + + mf = cl.mem_flags + + input_opencl = cl.Buffer(cl_ctx, mf.READ_ONLY, self._check_max_buffer_size(image[0:max_slices,:,:].nbytes, device['device'], max_slices)) + output_opencl_col = cl.Buffer(cl_ctx, mf.WRITE_ONLY, self._check_max_buffer_size(gradient_col[0:max_slices,:,:].nbytes, device['device'] , max_slices)) + output_opencl_row = cl.Buffer(cl_ctx, mf.WRITE_ONLY, self._check_max_buffer_size(gradient_row[0:max_slices, :, :].nbytes, device['device'], max_slices)) + + cl.enqueue_copy(cl_queue, input_opencl, image[0:max_slices,:,:]).wait() + + code = self._get_cl_code("_le_gradient_two_point.cl", device['DP']) + prg = cl.Program(cl_ctx, code).build() + knl = prg.gradient_two_point + + for i in range(0, image.shape[0], max_slices): + if image.shape[0] - i >= max_slices: + n_slices = max_slices + else: + n_slices = image.shape[0] - i + knl(cl_queue, + (n_slices,), + None, + input_opencl, + output_opencl_col, + output_opencl_row, + np.int32(row), + np.int32(col)).wait() + + cl.enqueue_copy(cl_queue, gradient_col[i:i+n_slices,:,:], output_opencl_col).wait() + cl.enqueue_copy(cl_queue, gradient_row[i:i+n_slices,:,:], output_opencl_row).wait() + if i+n_slices(gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int colsM = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rowsM = image_interp.shape[1] + cdef int colsM = image_interp.shape[2] cdef float [:,:,:] rgc_map = np.zeros((nFrames, rowsM, colsM), dtype=np.float32) @@ -88,8 +88,8 @@ class RadialGradientConvergence(LiquidEngine): cdef int margin = int(fwhm * magnification) cdef int nFrames = gradient_col_interp.shape[0] - cdef int rowsM = (gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int colsM = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rowsM = image_interp.shape[1] + cdef int colsM = image_interp.shape[2] cdef float [:,:,:] rgc_map = np.zeros((nFrames, rowsM, colsM), dtype=np.float32) @@ -120,8 +120,8 @@ class RadialGradientConvergence(LiquidEngine): cdef int margin = int(fwhm * magnification) cdef int nFrames = gradient_col_interp.shape[0] - cdef int rowsM = (gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int colsM = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rowsM = image_interp.shape[1] + cdef int colsM = image_interp.shape[2] cdef float [:,:,:] rgc_map = np.zeros((nFrames, rowsM, colsM), dtype=np.float32) @@ -152,8 +152,8 @@ class RadialGradientConvergence(LiquidEngine): cdef int margin = int(fwhm * magnification) cdef int nFrames = gradient_col_interp.shape[0] - cdef int rowsM = (gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int colsM = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rowsM = image_interp.shape[1] + cdef int colsM = image_interp.shape[2] cdef float [:,:,:] rgc_map = np.zeros((nFrames, rowsM, colsM), dtype=np.float32) @@ -184,8 +184,8 @@ class RadialGradientConvergence(LiquidEngine): cdef int margin = int(fwhm * magnification) cdef int nFrames = gradient_col_interp.shape[0] - cdef int rowsM = (gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int colsM = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rowsM = image_interp.shape[1] + cdef int colsM = image_interp.shape[2] cdef float [:,:,:] rgc_map = np.zeros((nFrames, rowsM, colsM), dtype=np.float32) @@ -229,8 +229,8 @@ class RadialGradientConvergence(LiquidEngine): # Sizes cdef int nFrames = gradient_col_interp.shape[0] - cdef int rows_interpolated = (gradient_row_interp.shape[1] / Gx_Gy_MAGNIFICATION) - cdef int cols_interpolated = (gradient_row_interp.shape[2] / Gx_Gy_MAGNIFICATION) + cdef int rows_interpolated = image_interp.shape[1] + cdef int cols_interpolated = image_interp.shape[2] # Grid size of the global work space lowest_row = margin diff --git a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx index 002423ed..0ef5070b 100644 --- a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx +++ b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx @@ -8,6 +8,8 @@ from ...__liquid_engine__ import LiquidEngine from cython.parallel import prange from .__interpolation_tools__ import check_image +from scipy.stats import pearsonr as pearson_correlation + cdef extern from "_c_gradients.h": void _c_gradient_roberts_cross(float* pixels, float* GxArray, float* GyArray, int w, int h) nogil @@ -26,6 +28,25 @@ class GradientRobertsCross(LiquidEngine): def benchmark(self, image): image = check_image(image) return super().benchmark(image) + + def _compare_runs(self, output_1, output_2): + """@public""" + if isinstance(output_1, tuple): + output_1 = np.concatenate((output_1[0], output_1[1]), axis=0) + if isinstance(output_2, tuple): + output_2 = np.concatenate((output_2[0], output_2[1]), axis=0) + if output_1.ndim > 2: + pcc = 0 + for i in range(output_1.shape[0]): + pcc += pearson_correlation(output_1[i, :, :].flatten(), output_2[i, :, :].flatten()).statistic + pcc /= output_1.shape[0] + else: + pcc = pearson_correlation(output_1.flatten(), output_2.flatten()).statistic + + if pcc > 0.8: + return True + else: + return False def _run_unthreaded(self, float[:,:,:] image): """ diff --git a/src/nanopyx/core/transform/error_map.pyx b/src/nanopyx/core/transform/error_map.pyx index 65e67a42..837838a7 100644 --- a/src/nanopyx/core/transform/error_map.pyx +++ b/src/nanopyx/core/transform/error_map.pyx @@ -13,6 +13,7 @@ from scipy.stats import ( # TODO RECHECK VALUES from ._le_interpolation_catmull_rom import ShiftAndMagnify +from ._interpolation import fht_space_interpolation as dht_interpolation from ..analysis.pearson_correlation import pearson_correlation @@ -48,8 +49,9 @@ cdef class ErrorMap: cdef float[:,:,:] result if magnification > 1: - result = interpolator.run(np.asarray(img_ref).astype(np.float32),0,0,magnification,magnification) - img_ref_int[:,:] = result[0,:,:] + result = dht_interpolation(np.asarray([img_ref]), magnification) + + img_ref_int = result[0, :, :] else: img_ref_int = img_ref diff --git a/tests/test_package_level_calls.py b/tests/test_package_level_calls.py index d42e0957..147d9fb5 100644 --- a/tests/test_package_level_calls.py +++ b/tests/test_package_level_calls.py @@ -12,9 +12,9 @@ def test_package_SRRF(): nanopyx.SRRF(img) -# def test_package_eSRRF(): -# img = np.random.random((10, 100, 100)).astype(np.float32) -# nanopyx.eSRRF(img) +def test_package_eSRRF(): + img = np.random.random((10, 100, 100)).astype(np.float32) + nanopyx.eSRRF(img) def test_package_frc():