diff --git a/notebooks/LiquidEngineImplementationExample.ipynb b/notebooks/LiquidEngineImplementationExample.ipynb new file mode 100644 index 00000000..ad8df266 --- /dev/null +++ b/notebooks/LiquidEngineImplementationExample.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a Liquid Engine Class implementing the two modes of Scikit-image NLM denoising as two different implementations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a random image array to be processed" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "img = np.random.random((1, 100, 100)).astype(np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark the two implementations" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cupy implementation is not available. Make sure you have the right version of Cupy and CUDA installed.\n", + "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 21.019929375033826 seconds\n", + "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.3058308749459684 seconds\n", + "Fastest run type: ski_nlm_nonfast\n", + "Slowest run type: ski_nlm_fast\n", + "ski_nlm_nonfast is 68.73x faster than ski_nlm_fast\n" + ] + }, + { + "data": { + "text/plain": [ + "[(0.3058308749459684, 'ski_nlm_nonfast', None),\n", + " (21.019929375033826, 'ski_nlm_fast', None)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from myliquidengineclass import MyLiquidEngineClass\n", + "my_liquid = MyLiquidEngineClass()\n", + "my_liquid.benchmark(img, patch_size=5, patch_distance=11, h=0.1, sigma=0.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark the two implementations with different image size" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 0.013037208002060652 seconds\n", + "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.06136862491257489 seconds\n", + "Fastest run type: ski_nlm_fast\n", + "Slowest run type: ski_nlm_nonfast\n", + "ski_nlm_fast is 4.71x faster than ski_nlm_nonfast\n" + ] + }, + { + "data": { + "text/plain": [ + "[(0.013037208002060652, 'ski_nlm_fast', None),\n", + " (0.06136862491257489, 'ski_nlm_nonfast', None)]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_liquid = MyLiquidEngineClass()\n", + "my_liquid.benchmark(np.random.random((500, 500)).astype(np.float32), patch_size=5, patch_distance=1, h=0.1, sigma=0.0)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ocb_dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/myliquidengineclass.py b/notebooks/myliquidengineclass.py new file mode 100644 index 00000000..91ab3b1d --- /dev/null +++ b/notebooks/myliquidengineclass.py @@ -0,0 +1,22 @@ +import numpy as np +from nanopyx.__liquid_engine__ import LiquidEngine +from skimage.restoration import denoise_nl_means + + +class MyLiquidEngineClass(LiquidEngine): + + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): + self._designation = "MyLiquidEngineClass" + super().__init__( + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) + + def run(self, image: np.ndarray, patch_size: int, patch_distance: int, h: float, sigma: float, run_type:bool=None): + if image.dtype != "np.float32": + image = image.astype("np.float32") + return self._run(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) + + def _run_ski_nlm_fast(self, image, patch_size, patch_distance, h, sigma): + return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) + + def _run_ski_nlm_nonfast(self, image, patch_size, patch_distance, h, sigma): + return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index d9047f8f..4da05d08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ build-backend = "setuptools.build_meta" name = "nanopyx" description = "Nanoscopy Python library (NanoPyx, the successor to NanoJ) - focused on light microscopy and super-resolution imaging" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.9,<3.12" license = { file = "LICENSE.txt" } keywords = [ "NanoJ", @@ -36,6 +36,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ + "liquid_engine", "mako>=1.3.0", "cython>=0.29.32", "numpy>=1.22,<2", @@ -139,7 +140,7 @@ reportUndefinedVariable = false reportMissingImports = false [tool.pytest.ini_options] -addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks" +addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks --cov-report term-missing" timeout = 6001 plt_dirname = "tests_plots" doctest_encoding = "latin1" diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx index 62bc88bf..1503d573 100644 --- a/src/mako_templates/_le_interpolation_base.pyx +++ b/src/mako_templates/_le_interpolation_base.pyx @@ -9,7 +9,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_${self.attr.inter_name}.h": @@ -23,10 +23,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_${self.attr.inter_name}" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -65,8 +62,12 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -115,6 +116,13 @@ class ShiftAndMagnify(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -152,11 +160,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_${self.attr.inter_name}" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -199,7 +205,12 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -252,6 +263,13 @@ class ShiftScaleRotate(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -298,11 +316,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_${self.attr.inter_name}" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -339,7 +355,12 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -396,7 +417,13 @@ class PolarTransform(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx index 4adae44d..ec8db173 100644 --- a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx +++ b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx @@ -9,7 +9,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt,pow from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .ccm cimport _calculate_slice_ccm from .estimate_shift import GetMaxOptimizer @@ -99,8 +99,7 @@ class ChannelRegistrationEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ChannelRegistrationEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None): return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type) @@ -110,6 +109,13 @@ class ChannelRegistrationEstimator(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ _runtype = "${sch}".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -251,8 +257,14 @@ class ChannelRegistrationEstimator(LiquidEngine): % endfor - def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device): - _runtype = "OpenCL_" + device["device"].name + def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None): + """ + @gpu + """ + if device is None: + device = _fastest_device + + _runtype = "opencl" crsm = ShiftAndMagnify(verbose=False) cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32) diff --git a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx index de121ac0..1c928af6 100644 --- a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx +++ b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx @@ -1,5 +1,5 @@ <%! -schedulers = ['unthreaded','threaded','threaded_guided','threaded_dynamic','threaded_static'] +schedulers = ['unthreaded','threaded'] %># cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=False import time import scipy @@ -27,10 +27,7 @@ class DriftEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "DriftEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, - threaded_dynamic_=False, threaded_guided_=False, - njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None): return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type) @@ -40,7 +37,13 @@ class DriftEstimator(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ if not _check_even_square(image): image = _make_even_square(image) @@ -129,19 +132,9 @@ class DriftEstimator(LiquidEngine): % elif sch=='threaded': for s in prange(n_slices): % else: - for s in prange(n_slices,schedule="${sch.split('_')[1]}"): + for s in prange(n_slices): %endif output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) return np.asarray(output).astype(np.float32) %endfor - - -# % if sch=='unthreaded': -# for i in range(n_blocks): -# % elif sch=='threaded': -# for i in prange(n_blocks): -# % else: -# for i in prange(n_blocks,schedule="${sch.split('_')[1]}"): -# %endif -# average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0) \ No newline at end of file diff --git a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx index 2cbc1ddf..fa72d800 100644 --- a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx @@ -15,7 +15,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device class Convolution(LiquidEngine): @@ -26,11 +26,7 @@ class Convolution(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Conv2D" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True, - verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, kernel, run_type=None): image = check_array(image) @@ -41,7 +37,13 @@ class Convolution(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -85,8 +87,13 @@ class Convolution(LiquidEngine): % endfor - def _run_opencl(self, image, kernel, device): - + def _run_opencl(self, image, kernel, device=None): + """ + @gpu + """ + if device is None: + device = _fastest_device + # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -117,16 +124,35 @@ class Convolution(LiquidEngine): return image_out def _run_python(self, image, kernel): + """ + @cpu + """ return convolution2D_python(image, kernel).astype(np.float32) def _run_transonic(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_transonic(image, kernel).astype(np.float32) def _run_dask(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_dask(image, kernel).astype(np.float32) def _run_cuda(self, image, kernel): + """ + @gpu + """ return convolution2D_cuda(image, kernel).astype(np.float32) def _run_njit(self, image, kernel): + """ + @cpu + @threaded + @numba + """ return convolution2D_numba(image, kernel).astype(np.float32) diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx index 9e40749e..09710307 100644 --- a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx @@ -12,7 +12,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_interpolation_catmull_rom import ShiftAndMagnify from ._le_roberts_cross_gradients import GradientRobertsCross @@ -26,10 +26,7 @@ class eSRRF(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_ST" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None): image = check_image(image) @@ -40,6 +37,12 @@ class eSRRF(LiquidEngine): return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device + # TODO doIntensityWeighting is irrelevant on gpu2 cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -151,6 +154,11 @@ class eSRRF(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "${sch}".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -166,6 +174,10 @@ class eSRRF(LiquidEngine): % endfor def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @cython + """ runtype = "Unthreaded" crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx index 2f244a0b..e10253b3 100644 --- a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx @@ -28,9 +28,7 @@ class eSRRF3D(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_3D" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) self._gradients_s_interpolated = None self._gradients_r_interpolated = None self._gradients_c_interpolated = None @@ -60,8 +58,14 @@ class eSRRF3D(LiquidEngine): return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) % for sch in schedulers: - def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma diff --git a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx index d49153be..e854107f 100644 --- a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx @@ -14,7 +14,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device import os os.environ['PYOPENCL_NO_CACHE']='1' @@ -38,9 +38,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -76,6 +73,9 @@ class NLMDenoising(LiquidEngine): def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -83,6 +83,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -161,6 +165,11 @@ class NLMDenoising(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -227,7 +236,13 @@ class NLMDenoising(LiquidEngine): %endfor - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device + cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx index d828e973..40ceeacf 100644 --- a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx @@ -12,7 +12,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_integral_image.h": @@ -31,9 +31,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_patch" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -68,6 +65,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -75,6 +75,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -153,6 +157,11 @@ class NLMDenoising(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -231,7 +240,12 @@ class NLMDenoising(LiquidEngine): %endfor - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] diff --git a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx index 32ba7c96..0d90e2a4 100644 --- a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx @@ -14,7 +14,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_patch_distance.h": @@ -30,9 +30,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_pixel" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -67,6 +64,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False) @@ -75,6 +75,13 @@ class NLMDenoising(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -143,7 +150,13 @@ class NLMDenoising(LiquidEngine): %endfor - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + @cython + """ + if device is None: + device = _fastest_device cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx index 651e6e50..3ad05ca6 100644 --- a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx @@ -8,7 +8,7 @@ cimport numpy as np from cython.parallel import parallel, prange from libc.math cimport sqrt, pow -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from .__interpolation_tools__ import check_image @@ -24,8 +24,6 @@ class RadialGradientConvergence(LiquidEngine): self._designation = "RadialGradientConvergence" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) @@ -43,7 +41,10 @@ class RadialGradientConvergence(LiquidEngine): return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting) def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -73,7 +74,11 @@ class RadialGradientConvergence(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -107,6 +112,11 @@ class RadialGradientConvergence(LiquidEngine): def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # gradient gxgymag*mag*size # image_interp = mag*size diff --git a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx index 14aa2fb5..4b4ac4b5 100644 --- a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx @@ -9,7 +9,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt, pi, fabs, cos, sin from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .__interpolation_tools__ import check_image from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify @@ -35,8 +35,6 @@ class Radiality(LiquidEngine): self._designation = "Radiality" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=False, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): @@ -49,7 +47,7 @@ class Radiality(LiquidEngine): image_interp = check_image(image_interp) return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting) - def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): + """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): cdef int _magnification = magnification cdef int _border = border @@ -85,11 +83,15 @@ class Radiality(LiquidEngine): else: imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w) - return np.asarray(imRad) + return np.asarray(imRad)""" % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -132,6 +134,11 @@ class Radiality(LiquidEngine): def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device cl_ctx = cl.Context([device['device']]) cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx index 48967a41..d3e8ae1c 100644 --- a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx @@ -4,7 +4,7 @@ schedulers = ['threaded','threaded_guided','threaded_dynamic','threaded_static'] import numpy as np cimport numpy as np -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from cython.parallel import prange @@ -19,8 +19,6 @@ class GradientRobertsCross(LiquidEngine): self._designation = "GradientRobertsCross" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, run_type = None): @@ -32,7 +30,10 @@ class GradientRobertsCross(LiquidEngine): return super().benchmark(image) def _run_unthreaded(self, float[:,:,:] image): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) cdef float [:,:,:] gradient_row = np.zeros_like(image) @@ -46,6 +47,11 @@ class GradientRobertsCross(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -63,7 +69,12 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row % endfor - def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1): + def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx index 0718f968..a8f4fcf9 100644 --- a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx +++ b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx @@ -9,7 +9,7 @@ cimport numpy as np from cython.parallel import prange from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot @@ -25,9 +25,7 @@ class MandelbrotBenchmark(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Mandelbrot_Benchmark" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True, + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray: @@ -45,7 +43,12 @@ class MandelbrotBenchmark(LiquidEngine): def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1): return super().benchmark(size, r_start, r_end, c_start, c_end) - def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray: + def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -77,6 +80,10 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot.get() def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -94,6 +101,11 @@ class MandelbrotBenchmark(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -115,11 +127,19 @@ class MandelbrotBenchmark(LiquidEngine): % endfor def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray: + """ + @cpu + @threaded + @numba + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py index 946e893a..641a80a1 100644 --- a/src/nanopyx/__agent__.py +++ b/src/nanopyx/__agent__.py @@ -1,244 +1 @@ -import platform -import random - -import numpy as np -from sklearn.linear_model import LogisticRegression -from scipy.stats import norm - -from .__njit__ import njit_works -from .__opencl__ import opencl_works, devices - - -class Agent_: - - """ - Base class for the Agent of the Nanopyx Liquid Engine - Pond, James Pond - """ - - def __init__( - self, - ) -> None: - """ - Initialize the Agent - The agent is supposed to work as a singleton object, initialized only once in the __init__.py of nanopyx - PS: (Is this good enough or is it necessary to implement the singleton design pattern?) - - Agent responsabilities: - 1. Store the current state of the machine (e.g. OS, CPU, RAM, GPU, Python version etc.); - 2. Store the current state of ALL initialized LE objects (e.g. anything that is currently running, anything that is scheduled to run, - runs previously executed in the current session etc.); - 3. Whenever a LE object wants to run, it must query the Agent on what is the best implementation for it; - 4. Tests whether there was an unexpected delay and adjust following paths based on it; - """ - - ### MACHINE INFO ### - self.os_info = {"OS": platform.platform(), "Architecture": platform.machine()} - self.cpu_info = {"CPU": platform.processor()} - self.ram_info = {"RAM": "TBD"} - self.py_info = { - "Version": platform.python_version(), - "Implementation": platform.python_implementation(), - "Compiler": platform.python_compiler(), - } - - self.numba_info = {"Numba": njit_works()} - self.pyopencl_info = {"PyOpenCL": opencl_works(), "Devices": devices} - self.cuda_info = {"CUDA": "TBD"} - ### MACHINE INFO ### - - self._current_runs = [] - self._scheduled_runs = [] - self._finished_runs = [] - - self.delayed_runtypes = {} # Store runtypes as keys and their values as (delay_factor, delay_prob) - - def _get_ordered_run_types(self, fn, args, kwargs): - """@public - Retrieves an ordered list of run_types for the given args and kwargs - """ - - # str representation of the arguments and their corresponding 'norm' - repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs) - # dictionary to hold speeds - fast_avg_speed = {} - fast_std_speed = {} - slow_avg_speed = {} - slow_std_speed = {} - # fn._benchmarks is a dictionary of dictionaries. The first key is the run_type, the second key is the repr_args - # Check every run_type for the most similar args - for run_type in fn._run_types: - if repr_args in fn._benchmarks[run_type]: - run_info = fn._benchmarks[run_type][repr_args][1:] - else: - # if the repr_args are not in the benchmarks, find the most similar repr_args - best_score = np.inf - best_repr_args = None - for repr_args_ in fn._benchmarks[run_type]: - score = np.abs(fn._benchmarks[run_type][repr_args_][0] - repr_norm) - if score < best_score: - best_score = score - best_repr_args = repr_args_ - # What happens if there are no benchmarks for this runtype? - if best_repr_args is None: - run_info = [0] - else: - run_info = fn._benchmarks[run_type][best_repr_args][1:] - - if None in run_info: # yamls null are read into None python objects - continue - - if len(run_info) < 2: - # Fall back to default values - if "OpenCL" in run_type: - rt = "OpenCL" - else: - rt = run_type - - best_score = np.inf - best_repr_args = None - for repr_args_ in fn._default_benchmarks[rt]: - score = np.abs(fn._default_benchmarks[rt][repr_args_][0] - repr_norm) - if score < best_score: - best_score = score - best_repr_args = repr_args_ - run_info = fn._default_benchmarks[rt][best_repr_args][1:] - - run_info = np.array(run_info) - if len(run_info) > 50: - run_info = run_info[-50:] - - fast_values = np.partition(run_info, len(run_info) // 2)[: len(run_info) // 2] - slow_values = np.partition(run_info, len(run_info) // 2)[len(run_info) // 2 :] - fast_avg_speed[run_type] = np.average(fast_values) - fast_std_speed[run_type] = np.std(fast_values) - slow_avg_speed[run_type] = np.average(slow_values) - slow_std_speed[run_type] = np.std(slow_values) - - return fast_avg_speed, fast_std_speed, slow_avg_speed, slow_std_speed - - def _calculate_prob_of_delay(self, runtimes_history, avg, std): - """@public - Calculates the probability that the given run_type is still delayed using historical data - """ - - # Boolean array, True if delay, False if not - delays = runtimes_history > avg + 4 * std - - model = LogisticRegression() - model.fit([[state] for state in delays[:-1]], delays[1:]) - - return model.predict_proba([[True]])[:, model.classes_.tolist().index(True)][0] - - def _check_delay(self, run_type, runtime, runtimes_history, verbose=True): - """@public - Checks if the given run_type ran delayed in the previous run when compared with historical data - If delayed: - 1. Calculates a probability that this delay is maintained - 2. Stores the delay factor and the probability - """ - - threaded_runtypes = ["Threaded", "Threaded_static", "Threaded_dynamic", "Threaded_guided"] - - runtimes_history = np.array(runtimes_history) - if len(runtimes_history) > 50: - runtimes_history = runtimes_history[-50:] - fast_values = np.partition(runtimes_history, len(runtimes_history) // 2)[: len(runtimes_history) // 2] - slow_values = np.partition(runtimes_history, len(runtimes_history) // 2)[len(runtimes_history) // 2 :] - - fast_avg_speed = np.average(fast_values) - fast_std_speed = np.std(fast_values) - slow_avg_speed = np.average(slow_values) - slow_std_speed = np.std(slow_values) - - if run_type in self.delayed_runtypes: - if runtime < (slow_avg_speed - slow_std_speed) or runtime < (fast_avg_speed + fast_std_speed): - if "Threaded" in run_type: - for threaded_run_type in threaded_runtypes: - self.delayed_runtypes.pop(threaded_run_type, None) - else: - if run_type in self.delayed_runtypes: - self.delayed_runtypes.pop(run_type, None) - return "Delay off" - - if runtime > fast_avg_speed + 4 * fast_std_speed: - runtimes_history = np.append(runtimes_history, runtime) - delay_factor = runtime / fast_avg_speed - try: - delay_prob = self._calculate_prob_of_delay(runtimes_history, fast_avg_speed, fast_std_speed) - except ValueError: - delay_prob = 0.01 - if verbose: - print( - f"Run type {run_type} was delayed in the previous run. Delay factor: {delay_factor}, Delay probability: {delay_prob}" - ) - - if "Threaded" in run_type: - for threaded_run_type in threaded_runtypes: - self.delayed_runtypes[threaded_run_type] = (delay_factor, delay_prob) - else: - self.delayed_runtypes[run_type] = (delay_factor, delay_prob) - - def _adjust_times(self, fast_device_times, slow_device_times): - """@public - Adjusts the historic avg time of a run_type if it was delayed in previous runs - """ - adjusted_times = fast_device_times.copy() - for runtype in self.delayed_runtypes.keys(): - if runtype in fast_device_times.keys(): - delay_factor, delay_prob = self.delayed_runtypes[runtype] - # Weighted avg by the probability the run_type is still delayed - # expected_time * P(~delay) + delayed_time * P(delay) - adjusted_times[runtype] = ( - fast_device_times[runtype] * (1 - delay_prob) - + fast_device_times[runtype] * delay_factor * delay_prob - ) - - return adjusted_times - - def get_run_type(self, fn, args, kwargs): - """ - Returns the best run_type for the given args and kwargs - """ - - # Get list of run types - fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs) - - # Penalize the average time a run_type had if that run_type was delayed in previous runs - if len(self.delayed_runtypes.keys()) > 0: - adjusted_avg = self._adjust_times(fast_avg, slow_avg) - - if sorted(fast_avg, key=fast_avg.get)[0] == sorted(adjusted_avg, key=adjusted_avg.get)[0]: - return sorted(fast_avg, key=fast_avg.get)[0] - - weights = [(1 / adjusted_avg[k]) ** 2 for k in adjusted_avg] - weights = weights / np.sum(weights) - - # failsafe - if sum(weights) == 0: - weights = [1 for k in adjusted_avg] - - return random.choices(list(adjusted_avg.keys()), weights=weights, k=1)[0] - else: - return sorted(fast_avg, key=fast_avg.get)[0] - - def _inform(self, fn, verbose=True): - """@public - Informs the Agent that a LE object finished running - """ - - repr_args = fn._last_args - run_type = fn._last_runtype - - historical_data = fn._benchmarks[run_type][repr_args][1:] - - assert historical_data[-1] == fn._last_time, "Historical data is not consistent with the last runtime" - - if verbose: - print(f"Agent: {fn._designation} using {run_type} ran in {fn._last_time} seconds") - - if len(historical_data) > 19: - self._check_delay(run_type, historical_data[-1], historical_data[:-1], verbose=verbose) - - -Agent = Agent_() +from liquid_engine import Agent \ No newline at end of file diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py index 56b7b692..6638e067 100644 --- a/src/nanopyx/__liquid_engine__.py +++ b/src/nanopyx/__liquid_engine__.py @@ -1,531 +1 @@ -import os -import timeit -import yaml -import datetime -import inspect -import warnings -from functools import partial, reduce -from itertools import combinations -from pathlib import Path - -from importlib_resources import files - -import numpy as np - -# This will in the future come from the Agent -from .__njit__ import njit_works -from .__dask__ import dask_works -from .__transonic__ import transonic_works -from .__cuda__ import cuda_works -from .__opencl__ import opencl_works, devices, cl - -__home_folder__ = os.path.expanduser("~") -__benchmark_folder__ = os.path.join(__home_folder__, ".nanopyx") -if not os.path.exists(__benchmark_folder__): - os.makedirs(__benchmark_folder__) - -from .__agent__ import Agent # noqa: E402 - -from .core.analysis.pearson_correlation import pearson_correlation - - -class LiquidEngine: - """@public - Base class for parts of the Nanopyx Liquid Engine - Vroom Vroom - """ - - def __init__( - self, - testing: bool = False, - opencl_: bool = False, - unthreaded_: bool = False, - threaded_: bool = False, - threaded_static_: bool = False, - threaded_dynamic_: bool = False, - threaded_guided_: bool = False, - python_: bool = False, - njit_: bool = False, - dask_: bool = False, - transonic_: bool = False, - cuda_: bool = False, - clear_benchmarks: bool = False, - verbose: bool = True, - ) -> None: - """@public - Initialize the Liquid Engine - The Liquid Engine base class is inherited by children classes that implement specific methods - - Engine responsabilities: - 1. Store implemented run types; - 2. Handle previous benchmarks and I/O; - 2. When queried, benchmark all available run types; - 3. Run a specific method using a selected run type; - - Benchmark files have the following format: - The benchmark file is read as dict of dicts. - BENCHMARK DICT FOR A SPECIFIC METHOD - |- RUN_TYPE #1 - | |- ARGS_REPR #1 - | | |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail - | |- ARGS_REPR #2 - | | |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail - | (...) - |- RUN_TYPE #2 - (...) - """ - - # Start by checking available run types - self._run_types = {} - if opencl_ and opencl_works(): - for d in devices: - self._run_types[f"OpenCL_{d['device'].name}"] = partial(self._run_opencl, device=d) - if threaded_: - self._run_types["Threaded"] = self._run_threaded - if unthreaded_: - self._run_types["Unthreaded"] = self._run_unthreaded - if threaded_static_: - self._run_types["Threaded_static"] = self._run_threaded_static - if threaded_dynamic_: - self._run_types["Threaded_dynamic"] = self._run_threaded_dynamic - if threaded_guided_: - self._run_types["Threaded_guided"] = self._run_threaded_guided - if python_: - self._run_types["Python"] = self._run_python - if njit_ and njit_works(): - self._run_types["Numba"] = self._run_njit - # Try to trigger early compilation - try: - self._run_njit() - except TypeError: - print("Consider adding default arguments to the njit implementation to trigger early compilation") - if dask_ and dask_works(): - self._run_types["Dask"] = self._run_dask - if transonic_ and transonic_works(): - self._run_types["Transonic"] = self._run_transonic - if cuda_ and cuda_works(): - self._run_types["Cuda"] = self._run_cuda - - self.testing = testing - self.mem_div = 1 - - # benchmarks file path - # e.g.: ~/.nanopyx/liquid/_le_interpolation_nearest_neighbor.cpython-310-darwin/ShiftAndMagnify.yml - base_path = os.path.join( - __benchmark_folder__, "liquid", os.path.split(os.path.splitext(inspect.getfile(self.__class__))[0])[1] - ) - os.makedirs(base_path, exist_ok=True) - self._benchmark_filepath = os.path.join(base_path, self.__class__.__name__ + ".yml") - - # Load config file if it exists, otherwise create an empty config - if not clear_benchmarks and os.path.exists(self._benchmark_filepath): - with open(self._benchmark_filepath) as f: - self._benchmarks = yaml.load(f, Loader=yaml.FullLoader) - else: - self._benchmarks = {} - - # check if the cfg dictionary has a key for every available run type - for run_type_designation in self._run_types.keys(): - if run_type_designation not in self._benchmarks: - self._benchmarks[run_type_designation] = {} - - # helper attribute for benchmarking function - self._last_args = None - self._last_runtype = None - self._last_time = None - - self.Agent = Agent - - # load defaults - try: - self._default_benchmarks = yaml.safe_load( - files(f'liquid_benchmarks.{inspect.getmodule(self.__class__).__name__.split(".")[-1]}') - .joinpath(self.__class__.__name__ + ".yml") - .read_text() - ) - except: - self._default_benchmarks = [] - - self.verbose = verbose - - def _run(self, *args, run_type=None, **kwargs): - """@public - Runs the function with the given args and kwargs - - The code above does the following: - 1. Check the specified run_type - - if str checks if the run type exists otherwise raise a NotImplementedError - 2. It will run the _run_{run_type} function - 3. It will return the result and the time taken to run - - :param args: args for the function - :param run_type: the run type to use - :param kwargs: kwargs for the function - :return: the result and time taken - """ - - if run_type is None and self.verbose: - print("Querying the Agent...") - run_type = self.Agent.get_run_type(self, args, kwargs) - print(f"Agent chose: {run_type}") - elif run_type is None: - run_type = self.Agent.get_run_type(self, args, kwargs) - elif run_type not in self._run_types: - print(f"Unexpected run type {run_type}") - print("Querying the Agent...") - run_type = self.Agent.get_run_type(self, args, kwargs) - print(f"Agent chose: {run_type}") - - # try to run - try: - if self.mem_div > 999: - raise ValueError( - f"Maxmimum memory division factor achieved, can not try any longer with {run_type}. Use a smaller input or a different run_type" - ) - t_start = timeit.default_timer() - result = self._run_types[run_type](*args, **kwargs) - t2run = timeit.default_timer() - t_start - arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs) - self._store_results(arg_repr, arg_score, run_type, t2run) - - self._last_time = t2run - self._last_args = arg_repr - self._last_runtype = run_type - - self.Agent._inform(self, verbose=self.verbose) - - except (cl.MemoryError, cl.LogicError) as e: - print("Found: ", e) - print("Reducing maximum buffer size and trying again...") - self.mem_div += 1 - kwargs["mem_div"] = self.mem_div - result = self._run(*args, run_type=run_type, **kwargs) - except cl.Error as e: - if e.__str__() == "Buffer size is larger than device maximum memory allocation size": - print("Found: ", e) - print("Reducing maximum buffer size and trying again...") - self.mem_div += 1 - kwargs["mem_div"] = self.mem_div - result = self._run(*args, run_type=run_type, **kwargs) - else: - print(f"Unexpected error while trying to run {run_type}") - print(e) - print("Please try again with another run type") - result = None - except Exception as e: - print(f"Unexpected error while trying to run {run_type}") - print(e) - print("Please try again with another run type") - result = None - - self.mem_div = 1 - return result - - def benchmark(self, *args, **kwargs): - """ - 1. Run each available run type and record the run time and return value - 2. Sort the run times from fastest to slowest - 3. Compare each run type against each other, sorted by speed - - :param args: args for the run method - :param kwargs: kwargs for the run method - :return: a list of tuples containing the run time, run type name and optionally the return values - :rtype: [[run_time, run_type_name, return_value], ...] - """ - - # Create some lists to store runtimes and return values of run types - run_times = {} - returns = {} - - # Run each run type and record the run time and return value - for run_type in self._run_types: - r = self._run(*args, run_type=run_type, **kwargs) - - run_times[run_type] = self._last_time - - if self.testing: # Store return values if testing - returns[run_type] = r - else: - returns[run_type] = None - - # Sort run_times by value - speed_sort = [] - for run_type in sorted(run_times, key=run_times.get, reverse=False): - speed_sort.append( - ( - run_times[run_type], - run_type, - returns[run_type], - ) - ) - - print(f"Fastest run type: {speed_sort[0][1]}") - print(f"Slowest run type: {speed_sort[-1][1]}") - - # Compare each run type against each other, sorted by speed - different_runtypes = [] - for pair in combinations(speed_sort, 2): - print(f"{pair[0][1]} is {pair[1][0]/pair[0][0]:.2f}x faster than {pair[1][1]}") - if self.testing: - if self._compare_runs(pair[0][2], pair[1][2]): - print(f"{pair[0][1]} and {pair[1][1]} have similar outputs!") - else: - warnings.warn(f"WARNING: outputs of {pair[0][1]} and {pair[1][1]} don't match!") - different_runtypes.append(set([pair[0][1], pair[1][1]])) - if len(different_runtypes) <= len(self._run_types) - 1: - try: - common_runtype = reduce(lambda a, b: a & b, different_runtypes) - except TypeError: - common_runtype = {} - if common_runtype: - warnings.warn(f"WARNING: disabling {list(common_runtype)[0]} for this set of arguments!") - arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs) - self._store_results(arg_repr, arg_score, list(common_runtype)[0], None) # None saves to null in yamls - - return speed_sort - - def _compare_runs(self, output_1, output_2): - """@public""" - if output_1.ndim > 2: - pcc = 0 - for i in range(output_1.shape[0]): - pcc += pearson_correlation(output_1[i, :, :], output_2[i, :, :]) - pcc /= output_1.shape[0] - else: - pcc = pearson_correlation(output_1, output_2) - - if pcc > 0.8: - return True - else: - return False - - def _get_cl_code(self, file_name, cl_dp): - """ - Retrieves the OpenCL code from the corresponding .cl file - """ - cl_file = os.path.splitext(file_name)[0] + ".cl" - - if not os.path.exists(cl_file): - cl_file = Path(os.path.abspath(inspect.getfile(self.__class__))).parent / file_name - - assert os.path.exists(cl_file), "Could not find OpenCL file: " + str(cl_file) - - kernel_str = open(cl_file).read() - - if not cl_dp: - kernel_str = kernel_str.replace("double", "float") - - return kernel_str - - def _store_results(self, arg_repr, arg_score, run_type, t2run): - """@public - Stores the results of a run - """ - - # Check if the run type has been run, and if not create empty info - run_type_benchs = self._benchmarks[run_type] - if arg_repr not in run_type_benchs: - run_type_benchs[arg_repr] = [arg_score] - - # Get the run info - c = run_type_benchs[arg_repr] - - assert c[0] == arg_score, "arg_score mismatch" - - c.append(t2run) - - self._dump_run_times() - - def _dump_run_times( - self, - ): - """@public""" - # TODO We might need to wrap this into a multiprocessing.Queue if we find it blocking - with open(self._benchmark_filepath, "w") as f: - yaml.dump(self._benchmarks, f) - - def _get_args_repr_score(self, *args, **kwargs): - """@public - Get a string representation of the args and kwargs and corresponding 'score' / 'norm' - The idea is that similar args have closer 'score'. Fuzzy logic - - The code does the following: - 1. It converts any args that are floats or ints to "number()" strings, and any args that are tensors to "shape()" strings - 2. It converts any kwargs that are floats or ints to "number()" strings, and any kwargs that are tensors to "shape()" strings - 3. The 'score' is given by the product of all the floats or ints and all the shape sizes. - - :return: the string representation of the args and kwargs - :rtype: str - """ - _norm = 1 - _args = [] - for arg in args: - if type(arg) in (float, int): - _args.append(f"number({arg})") - if arg == 0: - arg = 1 - _norm *= arg - elif hasattr(arg, "shape"): - _args.append(f"shape{arg.shape}") - _norm *= arg.size - else: - _args.append(arg) - - _kwargs = {} - for k, v in kwargs.items(): - if type(v) in (float, int): - _kwargs[k] = f"number({v})" - if v == 0: - v = 1 - _norm *= v - if hasattr(v, "shape"): - _kwargs[k] = f"shape{arg.shape}" - _norm *= v.size - else: - _kwargs[k] = v - - return repr((_args, _kwargs)), _norm - - def get_highest_divisor(self, size_, max_): - """ - Returns the highest divisor of size_ that is still lower than max_ - """ - value = 1 - for i in range(1, int(np.sqrt(size_) + 1)): - if size_ % i == 0: - if i * i != size_: - div2 = size_ / i - - if i < max_: - value = max(value, i) - if div2 < max_: - value = max(value, div2) - return int(value) - - def get_work_group(self, device, shape): - """ - Calculates work group size for a given device and shape of global work space - """ - - max_wg_dims = device.max_work_item_sizes[0:3] - max_glo_dims = device.max_work_group_size - - three = self.get_highest_divisor(shape[2], max_wg_dims[2]) - max_two = max_glo_dims / three - two = self.get_highest_divisor(shape[1], max_two) - one = 1 - return (one, two, three) - - def _check_max_slices(self, input, number_of_max_slices): - """@public - Checks if number of maximum slices is greater than 0 - """ - if number_of_max_slices < 1: - raise ValueError("This device doesn't have enough memory to run this function with this input") - elif input.shape[0] < number_of_max_slices: - return input.shape[0] - else: - return number_of_max_slices - - def _check_max_buffer_size(self, size, device, n_slices): - """@public - Checks if buffer size is larger than device maximum memory allocation size and n_slices is 1 and raises appropriate errors that are handled in the _run function. - """ - if size > device.max_mem_alloc_size and n_slices == 1: - raise ValueError( - "This device cannot handle this input size with these parameters, try using a smaller input or other parameters" - ) - - if size > device.max_mem_alloc_size: - raise cl.Error("Buffer size is larger than device maximum memory allocation size") - - return size - - ##################################################### - # RUN METHODS # - # THESE SHOULD ALWAYS BE OVERRIDEN BY CHILD CLASSES # - ##################################################### - - def run(self, *args, **kwargs): - """ - Runs the function with the given args and kwargs - Should be overridden by the any class that inherits from this class - """ - return self._run(*args, **kwargs) - - def _run_opencl(*args, **kwargs): - """@public - Runs the OpenCL version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_unthreaded(*args, **kwargs): - """@public - Runs the cython unthreaded version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded(*args, **kwargs): - """@public - Runs the cython threaded version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded_static(*args, **kwargs): - """@public - Runs the cython threaded static version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded_dynamic(*args, **kwargs): - """@public - Runs the cython threaded dynamic version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded_guided(*args, **kwargs): - """@public - Runs the cython threaded guided version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_python(*args, **kwargs): - """@public - Runs the python version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_njit(*args, **kwargs): - """@public - Runs the njit version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_dask(*args, **kwargs): - """@public - Runs the dask version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_transonic(*args, **kwargs): - """@public - Runs the transonic version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_cuda(*args, **kwargs): - """@public - Runs the cuda version of the function - Should be overridden by the any class that inherits from this class - """ - pass +from liquid_engine import LiquidEngine \ No newline at end of file diff --git a/src/nanopyx/__opencl__.py b/src/nanopyx/__opencl__.py index 8eeacfbe..b8830ded 100644 --- a/src/nanopyx/__opencl__.py +++ b/src/nanopyx/__opencl__.py @@ -8,8 +8,11 @@ import pyopencl.array as cl_array devices = [] + _fastest_device = None + max_perf = 0 + for platform in cl.get_platforms(): - if "Microsoft" in platform.vendor: # TODO this takes out integrated graphics + if "Microsoft" in platform.vendor: # TODO this takes out emulated GPUs continue for dev in platform.get_devices(): # check if the device is a GPU @@ -19,7 +22,11 @@ cl_dp = False else: cl_dp = False - + + perf = dev.max_compute_units * dev.max_clock_frequency + if perf>max_perf: + max_perf = perf + _fastest_device = {"device": dev, "DP": cl_dp} devices.append({"device": dev, "DP": cl_dp}) @@ -28,6 +35,7 @@ cl = None cl_array = None devices = None + _fastest_device = None def print_opencl_info(): diff --git a/src/nanopyx/core/analysis/_le_channel_registration.pyx b/src/nanopyx/core/analysis/_le_channel_registration.pyx index 0ecff710..09c6e4c0 100644 --- a/src/nanopyx/core/analysis/_le_channel_registration.pyx +++ b/src/nanopyx/core/analysis/_le_channel_registration.pyx @@ -7,7 +7,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt,pow from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .ccm cimport _calculate_slice_ccm from .estimate_shift import GetMaxOptimizer @@ -97,8 +97,7 @@ class ChannelRegistrationEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ChannelRegistrationEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None): return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type) @@ -107,6 +106,10 @@ class ChannelRegistrationEstimator(LiquidEngine): return super().benchmark(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity) def _run_unthreaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @cython + """ _runtype = "unthreaded".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -241,6 +244,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -375,6 +383,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded_guided(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded_guided".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -509,6 +522,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded_dynamic(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded_dynamic".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -643,6 +661,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded_static(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded_static".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -777,8 +800,14 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) - def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device): - _runtype = "OpenCL_" + device["device"].name + def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None): + """ + @gpu + """ + if device is None: + device = _fastest_device + + _runtype = "opencl" crsm = ShiftAndMagnify(verbose=False) cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32) diff --git a/src/nanopyx/core/analysis/_le_drift_calculator.pyx b/src/nanopyx/core/analysis/_le_drift_calculator.pyx index 8c0658e0..25165f7f 100644 --- a/src/nanopyx/core/analysis/_le_drift_calculator.pyx +++ b/src/nanopyx/core/analysis/_le_drift_calculator.pyx @@ -25,10 +25,7 @@ class DriftEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "DriftEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, - threaded_dynamic_=False, threaded_guided_=False, - njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None): return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type) @@ -37,7 +34,10 @@ class DriftEstimator(LiquidEngine): return super().benchmark(image, time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option) def _run_unthreaded(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - + """ + @cpu + @cython + """ if not _check_even_square(image): image = _make_even_square(image) @@ -126,7 +126,11 @@ class DriftEstimator(LiquidEngine): return np.asarray(output).astype(np.float32) def _run_threaded(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - + """ + @cpu + @threaded + @cython + """ if not _check_even_square(image): image = _make_even_square(image) @@ -214,280 +218,3 @@ class DriftEstimator(LiquidEngine): output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) return np.asarray(output).astype(np.float32) - def _run_threaded_guided(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - - if not _check_even_square(image): - image = _make_even_square(image) - - # get image dimensions, should already be an even square - cdef int n_slices = image.shape[0] - cdef int n_rows = image.shape[1] - cdef int n_cols = image.shape[2] - - # ensures time averaging has an acceptable value - if time_averaging < 1: - time_averaging = 1 - elif time_averaging > (n_slices//2): - time_averaging = n_slices//2 - - cdef int n_blocks = n_slices // time_averaging - - averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32) - - cdef int idx - if time_averaging == 1: - averaged = image - else: - for idx in range(n_blocks): - averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0) - - cdef float[:, :, :] ccm - cdef int row_start - cdef int col_start - if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols: - row_start = int(n_rows / 2 - max_drift) - col_start = int(n_cols / 2 - max_drift) - ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)] - else: - ccm = _calculate_ccm(averaged, ref_option) - - cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32) - - cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32) - - cdef float bias_row = 0.0 - cdef float bias_col = 0.0 - cdef float shift_x, shift_y - - cdef int i - for i in range(n_blocks): - - optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32)) - shift_y, shift_x = optimizer.get_max() - - drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3) - drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3) - - if i == 0: - bias_row = drift_table[i, 0] - bias_col = drift_table[i, 1] - drift_table[i, 0] = drift_table[i, 0] - bias_row - drift_table[i, 1] = drift_table[i, 1] - bias_col - - if ref_option == 1 and i > 0: - drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0] - drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1] - - cdef float[:] drift_x, drift_y - if time_averaging > 1: - lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int) - x_interpolator = interp1d( - lin, np.array(drift_table[:, 1]), kind="cubic" - ) - y_interpolator = interp1d( - lin, np.array(drift_table[:, 0]), kind="cubic" - ) - - drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 1] = drift_x - drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 2] = drift_y - - else: - output[:, 1] = drift_table[:, 1] # switch order of rows and cols - output[:, 2] = drift_table[:, 0] # switch order of rows and cols - - cdef int s - with nogil: - for s in prange(n_slices,schedule="guided"): - output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) - - return np.asarray(output).astype(np.float32) - def _run_threaded_dynamic(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - - if not _check_even_square(image): - image = _make_even_square(image) - - # get image dimensions, should already be an even square - cdef int n_slices = image.shape[0] - cdef int n_rows = image.shape[1] - cdef int n_cols = image.shape[2] - - # ensures time averaging has an acceptable value - if time_averaging < 1: - time_averaging = 1 - elif time_averaging > (n_slices//2): - time_averaging = n_slices//2 - - cdef int n_blocks = n_slices // time_averaging - - averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32) - - cdef int idx - if time_averaging == 1: - averaged = image - else: - for idx in range(n_blocks): - averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0) - - cdef float[:, :, :] ccm - cdef int row_start - cdef int col_start - if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols: - row_start = int(n_rows / 2 - max_drift) - col_start = int(n_cols / 2 - max_drift) - ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)] - else: - ccm = _calculate_ccm(averaged, ref_option) - - cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32) - - cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32) - - cdef float bias_row = 0.0 - cdef float bias_col = 0.0 - cdef float shift_x, shift_y - - cdef int i - for i in range(n_blocks): - - optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32)) - shift_y, shift_x = optimizer.get_max() - - drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3) - drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3) - - if i == 0: - bias_row = drift_table[i, 0] - bias_col = drift_table[i, 1] - drift_table[i, 0] = drift_table[i, 0] - bias_row - drift_table[i, 1] = drift_table[i, 1] - bias_col - - if ref_option == 1 and i > 0: - drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0] - drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1] - - cdef float[:] drift_x, drift_y - if time_averaging > 1: - lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int) - x_interpolator = interp1d( - lin, np.array(drift_table[:, 1]), kind="cubic" - ) - y_interpolator = interp1d( - lin, np.array(drift_table[:, 0]), kind="cubic" - ) - - drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 1] = drift_x - drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 2] = drift_y - - else: - output[:, 1] = drift_table[:, 1] # switch order of rows and cols - output[:, 2] = drift_table[:, 0] # switch order of rows and cols - - cdef int s - with nogil: - for s in prange(n_slices,schedule="dynamic"): - output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) - - return np.asarray(output).astype(np.float32) - def _run_threaded_static(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - - if not _check_even_square(image): - image = _make_even_square(image) - - # get image dimensions, should already be an even square - cdef int n_slices = image.shape[0] - cdef int n_rows = image.shape[1] - cdef int n_cols = image.shape[2] - - # ensures time averaging has an acceptable value - if time_averaging < 1: - time_averaging = 1 - elif time_averaging > (n_slices//2): - time_averaging = n_slices//2 - - cdef int n_blocks = n_slices // time_averaging - - averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32) - - cdef int idx - if time_averaging == 1: - averaged = image - else: - for idx in range(n_blocks): - averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0) - - cdef float[:, :, :] ccm - cdef int row_start - cdef int col_start - if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols: - row_start = int(n_rows / 2 - max_drift) - col_start = int(n_cols / 2 - max_drift) - ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)] - else: - ccm = _calculate_ccm(averaged, ref_option) - - cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32) - - cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32) - - cdef float bias_row = 0.0 - cdef float bias_col = 0.0 - cdef float shift_x, shift_y - - cdef int i - for i in range(n_blocks): - - optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32)) - shift_y, shift_x = optimizer.get_max() - - drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3) - drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3) - - if i == 0: - bias_row = drift_table[i, 0] - bias_col = drift_table[i, 1] - drift_table[i, 0] = drift_table[i, 0] - bias_row - drift_table[i, 1] = drift_table[i, 1] - bias_col - - if ref_option == 1 and i > 0: - drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0] - drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1] - - cdef float[:] drift_x, drift_y - if time_averaging > 1: - lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int) - x_interpolator = interp1d( - lin, np.array(drift_table[:, 1]), kind="cubic" - ) - y_interpolator = interp1d( - lin, np.array(drift_table[:, 0]), kind="cubic" - ) - - drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 1] = drift_x - drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 2] = drift_y - - else: - output[:, 1] = drift_table[:, 1] # switch order of rows and cols - output[:, 2] = drift_table[:, 0] # switch order of rows and cols - - cdef int s - with nogil: - for s in prange(n_slices,schedule="static"): - output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) - - return np.asarray(output).astype(np.float32) - - -# % if sch=='unthreaded': -# for i in range(n_blocks): -# % elif sch=='threaded': -# for i in prange(n_blocks): -# % else: -# for i in prange(n_blocks,schedule="static"): -# %endif -# average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0) \ No newline at end of file diff --git a/src/nanopyx/core/transform/_le_convolution.pyx b/src/nanopyx/core/transform/_le_convolution.pyx index 5b3b541b..ce2fde47 100644 --- a/src/nanopyx/core/transform/_le_convolution.pyx +++ b/src/nanopyx/core/transform/_le_convolution.pyx @@ -13,7 +13,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device class Convolution(LiquidEngine): @@ -24,11 +24,7 @@ class Convolution(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Conv2D" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True, - verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, kernel, run_type=None): image = check_array(image) @@ -38,7 +34,10 @@ class Convolution(LiquidEngine): return super().benchmark(image, kernel) def _run_unthreaded(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -73,7 +72,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -108,7 +111,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded_guided(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -143,7 +150,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded_dynamic(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -178,7 +189,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded_static(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -213,8 +228,13 @@ class Convolution(LiquidEngine): return conv_out - def _run_opencl(self, image, kernel, device): - + def _run_opencl(self, image, kernel, device=None): + """ + @gpu + """ + if device is None: + device = _fastest_device + # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -245,16 +265,35 @@ class Convolution(LiquidEngine): return image_out def _run_python(self, image, kernel): + """ + @cpu + """ return convolution2D_python(image, kernel).astype(np.float32) def _run_transonic(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_transonic(image, kernel).astype(np.float32) def _run_dask(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_dask(image, kernel).astype(np.float32) def _run_cuda(self, image, kernel): + """ + @gpu + """ return convolution2D_cuda(image, kernel).astype(np.float32) def _run_njit(self, image, kernel): + """ + @cpu + @threaded + @numba + """ return convolution2D_numba(image, kernel).astype(np.float32) diff --git a/src/nanopyx/core/transform/_le_esrrf.pyx b/src/nanopyx/core/transform/_le_esrrf.pyx index 4c25f36b..df40fa2e 100644 --- a/src/nanopyx/core/transform/_le_esrrf.pyx +++ b/src/nanopyx/core/transform/_le_esrrf.pyx @@ -10,7 +10,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_interpolation_catmull_rom import ShiftAndMagnify from ._le_roberts_cross_gradients import GradientRobertsCross @@ -24,10 +24,7 @@ class eSRRF(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_ST" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None): image = check_image(image) @@ -38,6 +35,12 @@ class eSRRF(LiquidEngine): return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device + # TODO doIntensityWeighting is irrelevant on gpu2 cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -148,6 +151,11 @@ class eSRRF(LiquidEngine): return output_image def _run_threaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -161,6 +169,11 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_threaded_guided(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded_guided".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -174,6 +187,11 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_threaded_dynamic(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded_dynamic".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -187,6 +205,11 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_threaded_static(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded_static".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -201,6 +224,10 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @cython + """ runtype = "Unthreaded" crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) diff --git a/src/nanopyx/core/transform/_le_esrrf3d.pyx b/src/nanopyx/core/transform/_le_esrrf3d.pyx index b3c2c5a7..e1f27f7b 100644 --- a/src/nanopyx/core/transform/_le_esrrf3d.pyx +++ b/src/nanopyx/core/transform/_le_esrrf3d.pyx @@ -26,9 +26,7 @@ class eSRRF3D(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_3D" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) self._gradients_s_interpolated = None self._gradients_r_interpolated = None self._gradients_c_interpolated = None @@ -43,10 +41,10 @@ class eSRRF3D(LiquidEngine): if image.dtype != np.float32: image = image.astype(np.float32) if len(image.shape) == 4: - return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) + return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) elif len(image.shape) == 3: image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) - return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) + return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) def benchmark(self, image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): if image.dtype != np.float32: @@ -57,8 +55,12 @@ class eSRRF3D(LiquidEngine): image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) - def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -121,8 +123,12 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -185,8 +191,12 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -249,8 +259,12 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -313,8 +327,11 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx index 7a348921..d5f62230 100644 --- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_bicubic.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_bicubic" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_bicubic" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_bicubic" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx index 99ca8d07..f080fd06 100644 --- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_catmull_rom.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_catmull_rom" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_catmull_rom" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_catmull_rom" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx index bf1cd551..4537579d 100644 --- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_lanczos.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_lanczos" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_lanczos" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_lanczos" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx index 0df765ae..d53ee3d4 100644 --- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_nearest_neighbor.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_nearest_neighbor" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_nearest_neighbor" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_nearest_neighbor" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_nlm_denoising.pyx index d13a94de..b5aab716 100644 --- a/src/nanopyx/core/transform/_le_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_nlm_denoising.pyx @@ -12,7 +12,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device import os os.environ['PYOPENCL_NO_CACHE']='1' @@ -36,9 +36,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -74,6 +71,9 @@ class NLMDenoising(LiquidEngine): def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -81,6 +81,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -158,6 +162,11 @@ class NLMDenoising(LiquidEngine): pad_size: -pad_size]).astype(np.float32)) def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -218,6 +227,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -278,6 +292,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -338,6 +357,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -399,7 +423,13 @@ class NLMDenoising(LiquidEngine): - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device + cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx index 506ce316..f16d28a1 100644 --- a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx @@ -10,7 +10,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_integral_image.h": @@ -29,9 +29,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_patch" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -66,6 +63,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -73,6 +73,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -150,6 +154,11 @@ class NLMDenoising(LiquidEngine): pad_size: -pad_size]).astype(np.float32)) def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -218,6 +227,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -286,6 +300,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -354,6 +373,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -423,7 +447,12 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] diff --git a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx index 206d8824..b0358c27 100644 --- a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx @@ -12,7 +12,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_patch_distance.h": @@ -28,9 +28,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_pixel" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -65,6 +62,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False) @@ -72,6 +72,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -132,6 +136,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -192,6 +201,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -252,6 +266,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -312,6 +331,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -373,7 +397,13 @@ class NLMDenoising(LiquidEngine): - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + @cython + """ + if device is None: + device = _fastest_device cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx index 69f4f656..0e9d1ab1 100644 --- a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx +++ b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx @@ -6,7 +6,7 @@ cimport numpy as np from cython.parallel import parallel, prange from libc.math cimport sqrt, pow -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from .__interpolation_tools__ import check_image @@ -22,8 +22,6 @@ class RadialGradientConvergence(LiquidEngine): self._designation = "RadialGradientConvergence" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) @@ -41,7 +39,10 @@ class RadialGradientConvergence(LiquidEngine): return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting) def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -70,7 +71,11 @@ class RadialGradientConvergence(LiquidEngine): return np.asarray(rgc_map,dtype=np.float32) def _run_threaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -97,7 +102,11 @@ class RadialGradientConvergence(LiquidEngine): rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION, fwhm, tSO, tSS, _sensitivity) return np.asarray(rgc_map,dtype=np.float32) def _run_threaded_guided(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -124,7 +133,11 @@ class RadialGradientConvergence(LiquidEngine): rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION, fwhm, tSO, tSS, _sensitivity) return np.asarray(rgc_map,dtype=np.float32) def _run_threaded_dynamic(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -151,7 +164,11 @@ class RadialGradientConvergence(LiquidEngine): rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION, fwhm, tSO, tSS, _sensitivity) return np.asarray(rgc_map,dtype=np.float32) def _run_threaded_static(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -180,6 +197,11 @@ class RadialGradientConvergence(LiquidEngine): def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # gradient gxgymag*mag*size # image_interp = mag*size diff --git a/src/nanopyx/core/transform/_le_radiality.pyx b/src/nanopyx/core/transform/_le_radiality.pyx index 76a5c313..bad66375 100644 --- a/src/nanopyx/core/transform/_le_radiality.pyx +++ b/src/nanopyx/core/transform/_le_radiality.pyx @@ -7,7 +7,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt, pi, fabs, cos, sin from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .__interpolation_tools__ import check_image from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify @@ -33,8 +33,6 @@ class Radiality(LiquidEngine): self._designation = "Radiality" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=False, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): @@ -47,7 +45,7 @@ class Radiality(LiquidEngine): image_interp = check_image(image_interp) return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting) - def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): + """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): cdef int _magnification = magnification cdef int _border = border @@ -83,10 +81,14 @@ class Radiality(LiquidEngine): else: imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w) - return np.asarray(imRad) + return np.asarray(imRad)""" def _run_threaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -122,7 +124,11 @@ class Radiality(LiquidEngine): return np.asarray(imRad) def _run_threaded_guided(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -158,7 +164,11 @@ class Radiality(LiquidEngine): return np.asarray(imRad) def _run_threaded_dynamic(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -194,7 +204,11 @@ class Radiality(LiquidEngine): return np.asarray(imRad) def _run_threaded_static(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -232,6 +246,11 @@ class Radiality(LiquidEngine): def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device cl_ctx = cl.Context([device['device']]) cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx index 28fd22f5..002423ed 100644 --- a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx +++ b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx @@ -2,7 +2,7 @@ import numpy as np cimport numpy as np -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from cython.parallel import prange @@ -17,8 +17,6 @@ class GradientRobertsCross(LiquidEngine): self._designation = "GradientRobertsCross" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, run_type = None): @@ -30,7 +28,10 @@ class GradientRobertsCross(LiquidEngine): return super().benchmark(image) def _run_unthreaded(self, float[:,:,:] image): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) cdef float [:,:,:] gradient_row = np.zeros_like(image) @@ -43,6 +44,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -55,6 +61,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded_guided(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -67,6 +78,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded_dynamic(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -79,6 +95,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded_static(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -91,7 +112,12 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row - def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1): + def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1): + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx index a61fca34..d28896a9 100644 --- a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx +++ b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx @@ -7,7 +7,7 @@ cimport numpy as np from cython.parallel import prange from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot @@ -23,9 +23,7 @@ class MandelbrotBenchmark(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Mandelbrot_Benchmark" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True, + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray: @@ -43,7 +41,12 @@ class MandelbrotBenchmark(LiquidEngine): def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1): return super().benchmark(size, r_start, r_end, c_start, c_end) - def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray: + def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray: + """ + @gpu + """ + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -75,6 +78,10 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot.get() def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -91,6 +98,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -106,6 +118,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded_guided(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -121,6 +138,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded_dynamic(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -136,6 +158,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded_static(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -152,11 +179,19 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray: + """ + @cpu + @threaded + @numba + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot