From b3b6409dc45663c77b64d82f9eee84a8121b82f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Mon, 13 May 2024 16:27:38 +0100 Subject: [PATCH 01/14] Refactoring named runtypes --- src/nanopyx/__agent__.py | 10 +-- src/nanopyx/__liquid_engine__.py | 143 ++++++------------------------- 2 files changed, 30 insertions(+), 123 deletions(-) diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py index 946e893a..683255c6 100644 --- a/src/nanopyx/__agent__.py +++ b/src/nanopyx/__agent__.py @@ -90,8 +90,8 @@ def _get_ordered_run_types(self, fn, args, kwargs): if len(run_info) < 2: # Fall back to default values - if "OpenCL" in run_type: - rt = "OpenCL" + if "opencl" in run_type: + rt = "opencl" else: rt = run_type @@ -138,7 +138,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True): 2. Stores the delay factor and the probability """ - threaded_runtypes = ["Threaded", "Threaded_static", "Threaded_dynamic", "Threaded_guided"] + threaded_runtypes = ["threaded", "threaded_static", "threaded_dynamic", "threaded_guided"] runtimes_history = np.array(runtimes_history) if len(runtimes_history) > 50: @@ -153,7 +153,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True): if run_type in self.delayed_runtypes: if runtime < (slow_avg_speed - slow_std_speed) or runtime < (fast_avg_speed + fast_std_speed): - if "Threaded" in run_type: + if "threaded" in run_type: for threaded_run_type in threaded_runtypes: self.delayed_runtypes.pop(threaded_run_type, None) else: @@ -173,7 +173,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True): f"Run type {run_type} was delayed in the previous run. Delay factor: {delay_factor}, Delay probability: {delay_prob}" ) - if "Threaded" in run_type: + if "threaded" in run_type: for threaded_run_type in threaded_runtypes: self.delayed_runtypes[threaded_run_type] = (delay_factor, delay_prob) else: diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py index 56b7b692..d911f872 100644 --- a/src/nanopyx/__liquid_engine__.py +++ b/src/nanopyx/__liquid_engine__.py @@ -38,17 +38,6 @@ class LiquidEngine: def __init__( self, testing: bool = False, - opencl_: bool = False, - unthreaded_: bool = False, - threaded_: bool = False, - threaded_static_: bool = False, - threaded_dynamic_: bool = False, - threaded_guided_: bool = False, - python_: bool = False, - njit_: bool = False, - dask_: bool = False, - transonic_: bool = False, - cuda_: bool = False, clear_benchmarks: bool = False, verbose: bool = True, ) -> None: @@ -77,34 +66,22 @@ def __init__( # Start by checking available run types self._run_types = {} - if opencl_ and opencl_works(): - for d in devices: - self._run_types[f"OpenCL_{d['device'].name}"] = partial(self._run_opencl, device=d) - if threaded_: - self._run_types["Threaded"] = self._run_threaded - if unthreaded_: - self._run_types["Unthreaded"] = self._run_unthreaded - if threaded_static_: - self._run_types["Threaded_static"] = self._run_threaded_static - if threaded_dynamic_: - self._run_types["Threaded_dynamic"] = self._run_threaded_dynamic - if threaded_guided_: - self._run_types["Threaded_guided"] = self._run_threaded_guided - if python_: - self._run_types["Python"] = self._run_python - if njit_ and njit_works(): - self._run_types["Numba"] = self._run_njit - # Try to trigger early compilation - try: - self._run_njit() - except TypeError: - print("Consider adding default arguments to the njit implementation to trigger early compilation") - if dask_ and dask_works(): - self._run_types["Dask"] = self._run_dask - if transonic_ and transonic_works(): - self._run_types["Transonic"] = self._run_transonic - if cuda_ and cuda_works(): - self._run_types["Cuda"] = self._run_cuda + for rt in inspect.getmembers(self,inspect.ismethod): + if rt[0].startswith('_run_'): + runtypename = '_'.join(rt[0].split('_')[2:]).lower() + # TODO Recheck this logic TODO + if 'numba' in runtypename and not njit_works: + continue + elif 'dask' in runtypename and not dask_works: + continue + elif 'transonic' in runtypename and not transonic_works: + continue + elif 'cuda' in runtypename and not cuda_works: + continue + elif 'opencl' in runtypename and not opencl_works: + continue + else: + self._run_types[runtypename] = rt[1] self.testing = testing self.mem_div = 1 @@ -117,14 +94,17 @@ def __init__( os.makedirs(base_path, exist_ok=True) self._benchmark_filepath = os.path.join(base_path, self.__class__.__name__ + ".yml") - # Load config file if it exists, otherwise create an empty config + # Load benchmark file if it exists, otherwise create an empty config if not clear_benchmarks and os.path.exists(self._benchmark_filepath): with open(self._benchmark_filepath) as f: self._benchmarks = yaml.load(f, Loader=yaml.FullLoader) else: self._benchmarks = {} - # check if the cfg dictionary has a key for every available run type + # Lowercase everything for backwards compatibility + self._benchmarks = {k.lower(): v for k, v in self._benchmarks.items()} + + # check if the benchmark dictionary has a key for every available run type for run_type_designation in self._run_types.keys(): if run_type_designation not in self._benchmarks: self._benchmarks[run_type_designation] = {} @@ -143,6 +123,8 @@ def __init__( .joinpath(self.__class__.__name__ + ".yml") .read_text() ) + # Lowercase everything for backwards compatibility + self._default_benchmarks = {k.lower(): v for k, v in self._default_benchmarks.items()} except: self._default_benchmarks = [] @@ -164,6 +146,8 @@ def _run(self, *args, run_type=None, **kwargs): :return: the result and time taken """ + run_type = run_type.lower() + if run_type is None and self.verbose: print("Querying the Agent...") run_type = self.Agent.get_run_type(self, args, kwargs) @@ -452,80 +436,3 @@ def run(self, *args, **kwargs): Should be overridden by the any class that inherits from this class """ return self._run(*args, **kwargs) - - def _run_opencl(*args, **kwargs): - """@public - Runs the OpenCL version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_unthreaded(*args, **kwargs): - """@public - Runs the cython unthreaded version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded(*args, **kwargs): - """@public - Runs the cython threaded version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded_static(*args, **kwargs): - """@public - Runs the cython threaded static version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded_dynamic(*args, **kwargs): - """@public - Runs the cython threaded dynamic version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_threaded_guided(*args, **kwargs): - """@public - Runs the cython threaded guided version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_python(*args, **kwargs): - """@public - Runs the python version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_njit(*args, **kwargs): - """@public - Runs the njit version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_dask(*args, **kwargs): - """@public - Runs the dask version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_transonic(*args, **kwargs): - """@public - Runs the transonic version of the function - Should be overridden by the any class that inherits from this class - """ - pass - - def _run_cuda(*args, **kwargs): - """@public - Runs the cuda version of the function - Should be overridden by the any class that inherits from this class - """ - pass From cf7afc6a2f55c7a08739d8091d494abc33ad9734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Mon, 13 May 2024 16:27:58 +0100 Subject: [PATCH 02/14] added todo --- src/nanopyx/__agent__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py index 683255c6..29e123f3 100644 --- a/src/nanopyx/__agent__.py +++ b/src/nanopyx/__agent__.py @@ -137,7 +137,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True): 1. Calculates a probability that this delay is maintained 2. Stores the delay factor and the probability """ - + # TODO test threaded_runtypes = ["threaded", "threaded_static", "threaded_dynamic", "threaded_guided"] runtimes_history = np.array(runtimes_history) From 35e35ca77cefed95fbe6e9e81a4871ba955d1c5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Mon, 13 May 2024 16:28:12 +0100 Subject: [PATCH 03/14] estimator of fastest device --- src/nanopyx/__opencl__.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/nanopyx/__opencl__.py b/src/nanopyx/__opencl__.py index 8eeacfbe..b8830ded 100644 --- a/src/nanopyx/__opencl__.py +++ b/src/nanopyx/__opencl__.py @@ -8,8 +8,11 @@ import pyopencl.array as cl_array devices = [] + _fastest_device = None + max_perf = 0 + for platform in cl.get_platforms(): - if "Microsoft" in platform.vendor: # TODO this takes out integrated graphics + if "Microsoft" in platform.vendor: # TODO this takes out emulated GPUs continue for dev in platform.get_devices(): # check if the device is a GPU @@ -19,7 +22,11 @@ cl_dp = False else: cl_dp = False - + + perf = dev.max_compute_units * dev.max_clock_frequency + if perf>max_perf: + max_perf = perf + _fastest_device = {"device": dev, "DP": cl_dp} devices.append({"device": dev, "DP": cl_dp}) @@ -28,6 +35,7 @@ cl = None cl_array = None devices = None + _fastest_device = None def print_opencl_info(): From 6379a8d21cdfd5e63eb2e6abca1d5dff2b2fa6d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Mon, 13 May 2024 16:28:23 +0100 Subject: [PATCH 04/14] Testing new runtype naming --- .../nanopyx.core.transform._le_convolution.pyx | 13 ++++++------- src/nanopyx/core/transform/_le_convolution.pyx | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx index 2cbc1ddf..e304597a 100644 --- a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx @@ -15,7 +15,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device class Convolution(LiquidEngine): @@ -26,11 +26,7 @@ class Convolution(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Conv2D" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True, - verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, kernel, run_type=None): image = check_array(image) @@ -85,8 +81,11 @@ class Convolution(LiquidEngine): % endfor - def _run_opencl(self, image, kernel, device): + def _run_opencl(self, image, kernel, device=None): + if device is None: + device = _fastest_device + # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] diff --git a/src/nanopyx/core/transform/_le_convolution.pyx b/src/nanopyx/core/transform/_le_convolution.pyx index 5b3b541b..1f4d5fa8 100644 --- a/src/nanopyx/core/transform/_le_convolution.pyx +++ b/src/nanopyx/core/transform/_le_convolution.pyx @@ -13,7 +13,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device class Convolution(LiquidEngine): @@ -24,11 +24,7 @@ class Convolution(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Conv2D" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True, - verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, kernel, run_type=None): image = check_array(image) @@ -213,8 +209,11 @@ class Convolution(LiquidEngine): return conv_out - def _run_opencl(self, image, kernel, device): + def _run_opencl(self, image, kernel, device=None): + if device is None: + device = _fastest_device + # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] From b80ce16875ab47d7ff391b4b21be3f6fb2315dd8 Mon Sep 17 00:00:00 2001 From: Bruno Saraiva Date: Tue, 14 May 2024 14:53:50 +0100 Subject: [PATCH 05/14] updated all LE classes --- src/mako_templates/_le_interpolation_base.pyx | 34 +-- ...core.analysis._le_channel_registration.pyx | 13 +- ...pyx.core.analysis._le_drift_calculator.pyx | 19 +- .../nanopyx.core.transform._le_esrrf.pyx | 11 +- .../nanopyx.core.transform._le_esrrf3d.pyx | 4 +- ...nopyx.core.transform._le_nlm_denoising.pyx | 11 +- ...core.transform._le_patch_nlm_denoising.pyx | 9 +- ...core.transform._le_pixel_nlm_denoising.pyx | 9 +- ...nsform._le_radial_gradient_convergence.pyx | 7 +- .../nanopyx.core.transform._le_radiality.pyx | 11 +- ....transform._le_roberts_cross_gradients.pyx | 9 +- ...yx.core.utils._le_mandelbrot_benchmark.pyx | 11 +- src/nanopyx/__liquid_engine__.py | 15 +- .../analysis/_le_channel_registration.pyx | 13 +- .../core/analysis/_le_drift_calculator.pyx | 282 +----------------- src/nanopyx/core/transform/_le_esrrf.pyx | 11 +- src/nanopyx/core/transform/_le_esrrf3d.pyx | 8 +- .../transform/_le_interpolation_bicubic.pyx | 34 +-- .../_le_interpolation_catmull_rom.pyx | 34 +-- .../transform/_le_interpolation_lanczos.pyx | 34 +-- .../_le_interpolation_nearest_neighbor.pyx | 34 +-- .../core/transform/_le_nlm_denoising.pyx | 11 +- .../transform/_le_patch_nlm_denoising.pyx | 9 +- .../transform/_le_pixel_nlm_denoising.pyx | 9 +- .../_le_radial_gradient_convergence.pyx | 7 +- src/nanopyx/core/transform/_le_radiality.pyx | 11 +- .../transform/_le_roberts_cross_gradients.pyx | 9 +- .../core/utils/_le_mandelbrot_benchmark.pyx | 11 +- 28 files changed, 200 insertions(+), 480 deletions(-) diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx index 62bc88bf..73806713 100644 --- a/src/mako_templates/_le_interpolation_base.pyx +++ b/src/mako_templates/_le_interpolation_base.pyx @@ -9,7 +9,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_${self.attr.inter_name}.h": @@ -23,10 +23,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_${self.attr.inter_name}" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -65,8 +62,9 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -152,11 +150,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_${self.attr.inter_name}" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -199,7 +195,10 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -298,11 +297,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_${self.attr.inter_name}" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -339,7 +336,10 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx index 4adae44d..7d8ef454 100644 --- a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx +++ b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx @@ -9,7 +9,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt,pow from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .ccm cimport _calculate_slice_ccm from .estimate_shift import GetMaxOptimizer @@ -99,8 +99,7 @@ class ChannelRegistrationEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ChannelRegistrationEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None): return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type) @@ -251,8 +250,12 @@ class ChannelRegistrationEstimator(LiquidEngine): % endfor - def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device): - _runtype = "OpenCL_" + device["device"].name + def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None): + + if device is None: + device = _fastest_device + + _runtype = "opencl" crsm = ShiftAndMagnify(verbose=False) cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32) diff --git a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx index de121ac0..4b83448d 100644 --- a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx +++ b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx @@ -1,5 +1,5 @@ <%! -schedulers = ['unthreaded','threaded','threaded_guided','threaded_dynamic','threaded_static'] +schedulers = ['unthreaded','threaded'] %># cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=False import time import scipy @@ -27,10 +27,7 @@ class DriftEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "DriftEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, - threaded_dynamic_=False, threaded_guided_=False, - njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None): return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type) @@ -129,19 +126,9 @@ class DriftEstimator(LiquidEngine): % elif sch=='threaded': for s in prange(n_slices): % else: - for s in prange(n_slices,schedule="${sch.split('_')[1]}"): + for s in prange(n_slices): %endif output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) return np.asarray(output).astype(np.float32) %endfor - - -# % if sch=='unthreaded': -# for i in range(n_blocks): -# % elif sch=='threaded': -# for i in prange(n_blocks): -# % else: -# for i in prange(n_blocks,schedule="${sch.split('_')[1]}"): -# %endif -# average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0) \ No newline at end of file diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx index 9e40749e..ddfe4d05 100644 --- a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx @@ -12,7 +12,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_interpolation_catmull_rom import ShiftAndMagnify from ._le_roberts_cross_gradients import GradientRobertsCross @@ -26,10 +26,7 @@ class eSRRF(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_ST" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None): image = check_image(image) @@ -40,6 +37,10 @@ class eSRRF(LiquidEngine): return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1): + + if device is None: + device = _fastest_device + # TODO doIntensityWeighting is irrelevant on gpu2 cl_ctx = cl.Context([device['device']]) dc = device['device'] diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx index 2f244a0b..2660ed1f 100644 --- a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx @@ -28,9 +28,7 @@ class eSRRF3D(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_3D" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) self._gradients_s_interpolated = None self._gradients_r_interpolated = None self._gradients_c_interpolated = None diff --git a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx index d49153be..2f523166 100644 --- a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx @@ -14,7 +14,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device import os os.environ['PYOPENCL_NO_CACHE']='1' @@ -38,9 +38,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -227,7 +224,11 @@ class NLMDenoising(LiquidEngine): %endfor - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + + if device is None: + device = _fastest_device + cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx index d828e973..e31a3102 100644 --- a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx @@ -12,7 +12,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_integral_image.h": @@ -31,9 +31,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_patch" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -231,7 +228,9 @@ class NLMDenoising(LiquidEngine): %endfor - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray: + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] diff --git a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx index 32ba7c96..750cd7b4 100644 --- a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx @@ -14,7 +14,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_patch_distance.h": @@ -30,9 +30,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_pixel" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -143,7 +140,9 @@ class NLMDenoising(LiquidEngine): %endfor - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + if device is None: + device = _fastest_device cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx index 651e6e50..a8896ffc 100644 --- a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx @@ -8,7 +8,7 @@ cimport numpy as np from cython.parallel import parallel, prange from libc.math cimport sqrt, pow -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from .__interpolation_tools__ import check_image @@ -24,8 +24,6 @@ class RadialGradientConvergence(LiquidEngine): self._designation = "RadialGradientConvergence" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) @@ -108,6 +106,9 @@ class RadialGradientConvergence(LiquidEngine): def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1): + if device is None: + device = _fastest_device + # gradient gxgymag*mag*size # image_interp = mag*size # output = image_interp diff --git a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx index 14aa2fb5..89dd24c7 100644 --- a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx @@ -9,7 +9,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt, pi, fabs, cos, sin from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .__interpolation_tools__ import check_image from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify @@ -35,8 +35,6 @@ class Radiality(LiquidEngine): self._designation = "Radiality" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=False, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): @@ -49,7 +47,7 @@ class Radiality(LiquidEngine): image_interp = check_image(image_interp) return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting) - def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): + """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): cdef int _magnification = magnification cdef int _border = border @@ -85,7 +83,7 @@ class Radiality(LiquidEngine): else: imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w) - return np.asarray(imRad) + return np.asarray(imRad)""" % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): @@ -133,6 +131,9 @@ class Radiality(LiquidEngine): def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1): + if device is None: + device = _fastest_device + cl_ctx = cl.Context([device['device']]) cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx index 48967a41..b91d7d53 100644 --- a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx @@ -4,7 +4,7 @@ schedulers = ['threaded','threaded_guided','threaded_dynamic','threaded_static'] import numpy as np cimport numpy as np -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from cython.parallel import prange @@ -19,8 +19,6 @@ class GradientRobertsCross(LiquidEngine): self._designation = "GradientRobertsCross" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, run_type = None): @@ -63,7 +61,10 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row % endfor - def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1): + def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1): + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx index 0718f968..df126033 100644 --- a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx +++ b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx @@ -9,7 +9,7 @@ cimport numpy as np from cython.parallel import prange from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot @@ -25,9 +25,7 @@ class MandelbrotBenchmark(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Mandelbrot_Benchmark" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True, + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray: @@ -45,7 +43,10 @@ class MandelbrotBenchmark(LiquidEngine): def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1): return super().benchmark(size, r_start, r_end, c_start, c_end) - def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray: + def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray: + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py index d911f872..42f80282 100644 --- a/src/nanopyx/__liquid_engine__.py +++ b/src/nanopyx/__liquid_engine__.py @@ -70,15 +70,15 @@ def __init__( if rt[0].startswith('_run_'): runtypename = '_'.join(rt[0].split('_')[2:]).lower() # TODO Recheck this logic TODO - if 'numba' in runtypename and not njit_works: + if 'numba' in runtypename and not njit_works(): continue - elif 'dask' in runtypename and not dask_works: + elif 'dask' in runtypename and not dask_works(): continue - elif 'transonic' in runtypename and not transonic_works: + elif 'transonic' in runtypename and not transonic_works(): continue - elif 'cuda' in runtypename and not cuda_works: + elif 'cuda' in runtypename and not cuda_works(): continue - elif 'opencl' in runtypename and not opencl_works: + elif 'opencl' in runtypename and not opencl_works(): continue else: self._run_types[runtypename] = rt[1] @@ -103,6 +103,7 @@ def __init__( # Lowercase everything for backwards compatibility self._benchmarks = {k.lower(): v for k, v in self._benchmarks.items()} + print(self._benchmarks.keys()) # check if the benchmark dictionary has a key for every available run type for run_type_designation in self._run_types.keys(): @@ -125,6 +126,7 @@ def __init__( ) # Lowercase everything for backwards compatibility self._default_benchmarks = {k.lower(): v for k, v in self._default_benchmarks.items()} + print(self._default_benchmarks.keys()) except: self._default_benchmarks = [] @@ -146,7 +148,8 @@ def _run(self, *args, run_type=None, **kwargs): :return: the result and time taken """ - run_type = run_type.lower() + if run_type is not None: + run_type = run_type.lower() if run_type is None and self.verbose: print("Querying the Agent...") diff --git a/src/nanopyx/core/analysis/_le_channel_registration.pyx b/src/nanopyx/core/analysis/_le_channel_registration.pyx index 0ecff710..8d6945e3 100644 --- a/src/nanopyx/core/analysis/_le_channel_registration.pyx +++ b/src/nanopyx/core/analysis/_le_channel_registration.pyx @@ -7,7 +7,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt,pow from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .ccm cimport _calculate_slice_ccm from .estimate_shift import GetMaxOptimizer @@ -97,8 +97,7 @@ class ChannelRegistrationEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ChannelRegistrationEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None): return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type) @@ -777,8 +776,12 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) - def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device): - _runtype = "OpenCL_" + device["device"].name + def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None): + + if device is None: + device = _fastest_device + + _runtype = "opencl" crsm = ShiftAndMagnify(verbose=False) cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32) diff --git a/src/nanopyx/core/analysis/_le_drift_calculator.pyx b/src/nanopyx/core/analysis/_le_drift_calculator.pyx index 8c0658e0..d8f0154a 100644 --- a/src/nanopyx/core/analysis/_le_drift_calculator.pyx +++ b/src/nanopyx/core/analysis/_le_drift_calculator.pyx @@ -25,10 +25,7 @@ class DriftEstimator(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "DriftEstimator" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, - threaded_dynamic_=False, threaded_guided_=False, - njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose) + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None): return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type) @@ -214,280 +211,3 @@ class DriftEstimator(LiquidEngine): output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) return np.asarray(output).astype(np.float32) - def _run_threaded_guided(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - - if not _check_even_square(image): - image = _make_even_square(image) - - # get image dimensions, should already be an even square - cdef int n_slices = image.shape[0] - cdef int n_rows = image.shape[1] - cdef int n_cols = image.shape[2] - - # ensures time averaging has an acceptable value - if time_averaging < 1: - time_averaging = 1 - elif time_averaging > (n_slices//2): - time_averaging = n_slices//2 - - cdef int n_blocks = n_slices // time_averaging - - averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32) - - cdef int idx - if time_averaging == 1: - averaged = image - else: - for idx in range(n_blocks): - averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0) - - cdef float[:, :, :] ccm - cdef int row_start - cdef int col_start - if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols: - row_start = int(n_rows / 2 - max_drift) - col_start = int(n_cols / 2 - max_drift) - ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)] - else: - ccm = _calculate_ccm(averaged, ref_option) - - cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32) - - cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32) - - cdef float bias_row = 0.0 - cdef float bias_col = 0.0 - cdef float shift_x, shift_y - - cdef int i - for i in range(n_blocks): - - optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32)) - shift_y, shift_x = optimizer.get_max() - - drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3) - drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3) - - if i == 0: - bias_row = drift_table[i, 0] - bias_col = drift_table[i, 1] - drift_table[i, 0] = drift_table[i, 0] - bias_row - drift_table[i, 1] = drift_table[i, 1] - bias_col - - if ref_option == 1 and i > 0: - drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0] - drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1] - - cdef float[:] drift_x, drift_y - if time_averaging > 1: - lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int) - x_interpolator = interp1d( - lin, np.array(drift_table[:, 1]), kind="cubic" - ) - y_interpolator = interp1d( - lin, np.array(drift_table[:, 0]), kind="cubic" - ) - - drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 1] = drift_x - drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 2] = drift_y - - else: - output[:, 1] = drift_table[:, 1] # switch order of rows and cols - output[:, 2] = drift_table[:, 0] # switch order of rows and cols - - cdef int s - with nogil: - for s in prange(n_slices,schedule="guided"): - output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) - - return np.asarray(output).astype(np.float32) - def _run_threaded_dynamic(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - - if not _check_even_square(image): - image = _make_even_square(image) - - # get image dimensions, should already be an even square - cdef int n_slices = image.shape[0] - cdef int n_rows = image.shape[1] - cdef int n_cols = image.shape[2] - - # ensures time averaging has an acceptable value - if time_averaging < 1: - time_averaging = 1 - elif time_averaging > (n_slices//2): - time_averaging = n_slices//2 - - cdef int n_blocks = n_slices // time_averaging - - averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32) - - cdef int idx - if time_averaging == 1: - averaged = image - else: - for idx in range(n_blocks): - averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0) - - cdef float[:, :, :] ccm - cdef int row_start - cdef int col_start - if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols: - row_start = int(n_rows / 2 - max_drift) - col_start = int(n_cols / 2 - max_drift) - ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)] - else: - ccm = _calculate_ccm(averaged, ref_option) - - cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32) - - cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32) - - cdef float bias_row = 0.0 - cdef float bias_col = 0.0 - cdef float shift_x, shift_y - - cdef int i - for i in range(n_blocks): - - optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32)) - shift_y, shift_x = optimizer.get_max() - - drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3) - drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3) - - if i == 0: - bias_row = drift_table[i, 0] - bias_col = drift_table[i, 1] - drift_table[i, 0] = drift_table[i, 0] - bias_row - drift_table[i, 1] = drift_table[i, 1] - bias_col - - if ref_option == 1 and i > 0: - drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0] - drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1] - - cdef float[:] drift_x, drift_y - if time_averaging > 1: - lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int) - x_interpolator = interp1d( - lin, np.array(drift_table[:, 1]), kind="cubic" - ) - y_interpolator = interp1d( - lin, np.array(drift_table[:, 0]), kind="cubic" - ) - - drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 1] = drift_x - drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 2] = drift_y - - else: - output[:, 1] = drift_table[:, 1] # switch order of rows and cols - output[:, 2] = drift_table[:, 0] # switch order of rows and cols - - cdef int s - with nogil: - for s in prange(n_slices,schedule="dynamic"): - output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) - - return np.asarray(output).astype(np.float32) - def _run_threaded_static(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - - if not _check_even_square(image): - image = _make_even_square(image) - - # get image dimensions, should already be an even square - cdef int n_slices = image.shape[0] - cdef int n_rows = image.shape[1] - cdef int n_cols = image.shape[2] - - # ensures time averaging has an acceptable value - if time_averaging < 1: - time_averaging = 1 - elif time_averaging > (n_slices//2): - time_averaging = n_slices//2 - - cdef int n_blocks = n_slices // time_averaging - - averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32) - - cdef int idx - if time_averaging == 1: - averaged = image - else: - for idx in range(n_blocks): - averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0) - - cdef float[:, :, :] ccm - cdef int row_start - cdef int col_start - if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols: - row_start = int(n_rows / 2 - max_drift) - col_start = int(n_cols / 2 - max_drift) - ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)] - else: - ccm = _calculate_ccm(averaged, ref_option) - - cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32) - - cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32) - - cdef float bias_row = 0.0 - cdef float bias_col = 0.0 - cdef float shift_x, shift_y - - cdef int i - for i in range(n_blocks): - - optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32)) - shift_y, shift_x = optimizer.get_max() - - drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3) - drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3) - - if i == 0: - bias_row = drift_table[i, 0] - bias_col = drift_table[i, 1] - drift_table[i, 0] = drift_table[i, 0] - bias_row - drift_table[i, 1] = drift_table[i, 1] - bias_col - - if ref_option == 1 and i > 0: - drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0] - drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1] - - cdef float[:] drift_x, drift_y - if time_averaging > 1: - lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int) - x_interpolator = interp1d( - lin, np.array(drift_table[:, 1]), kind="cubic" - ) - y_interpolator = interp1d( - lin, np.array(drift_table[:, 0]), kind="cubic" - ) - - drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 1] = drift_x - drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices) - output[:, 2] = drift_y - - else: - output[:, 1] = drift_table[:, 1] # switch order of rows and cols - output[:, 2] = drift_table[:, 0] # switch order of rows and cols - - cdef int s - with nogil: - for s in prange(n_slices,schedule="static"): - output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2])) - - return np.asarray(output).astype(np.float32) - - -# % if sch=='unthreaded': -# for i in range(n_blocks): -# % elif sch=='threaded': -# for i in prange(n_blocks): -# % else: -# for i in prange(n_blocks,schedule="static"): -# %endif -# average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0) \ No newline at end of file diff --git a/src/nanopyx/core/transform/_le_esrrf.pyx b/src/nanopyx/core/transform/_le_esrrf.pyx index 4c25f36b..bc22ee34 100644 --- a/src/nanopyx/core/transform/_le_esrrf.pyx +++ b/src/nanopyx/core/transform/_le_esrrf.pyx @@ -10,7 +10,7 @@ from libc.math cimport cos, sin from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_interpolation_catmull_rom import ShiftAndMagnify from ._le_roberts_cross_gradients import GradientRobertsCross @@ -24,10 +24,7 @@ class eSRRF(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_ST" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None): image = check_image(image) @@ -38,6 +35,10 @@ class eSRRF(LiquidEngine): return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1): + + if device is None: + device = _fastest_device + # TODO doIntensityWeighting is irrelevant on gpu2 cl_ctx = cl.Context([device['device']]) dc = device['device'] diff --git a/src/nanopyx/core/transform/_le_esrrf3d.pyx b/src/nanopyx/core/transform/_le_esrrf3d.pyx index b3c2c5a7..662c1568 100644 --- a/src/nanopyx/core/transform/_le_esrrf3d.pyx +++ b/src/nanopyx/core/transform/_le_esrrf3d.pyx @@ -26,9 +26,7 @@ class eSRRF3D(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "eSRRF_3D" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) self._gradients_s_interpolated = None self._gradients_r_interpolated = None self._gradients_c_interpolated = None @@ -43,10 +41,10 @@ class eSRRF3D(LiquidEngine): if image.dtype != np.float32: image = image.astype(np.float32) if len(image.shape) == 4: - return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) + return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) elif len(image.shape) == 3: image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) - return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) + return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type) def benchmark(self, image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): if image.dtype != np.float32: diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx index 7a348921..81d3a0d2 100644 --- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_bicubic.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_bicubic" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_bicubic" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_bicubic" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx index 99ca8d07..72c0b0f7 100644 --- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_catmull_rom.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_catmull_rom" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_catmull_rom" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_catmull_rom" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx index bf1cd551..4c696f48 100644 --- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_lanczos.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_lanczos" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_lanczos" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_lanczos" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx index 0df765ae..ecf3510b 100644 --- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx @@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log from .__interpolation_tools__ import check_image, value2array from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_interpolation_nearest_neighbor.h": @@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftMagnify_nearest_neighbor" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, - verbose=verbose) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray: """ @@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) - def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray: - + def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] @@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine): Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "ShiftScaleRotate_nearest_neighbor" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray: """ @@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine): image = check_image(image) return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) - def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) @@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine): Polar Transformations using the NanoPyx Liquid Engine """ - def __init__(self, clear_benchmarks=False, testing=False): + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "PolarTransform_nearest_neighbor" - super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True) + super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray: """ @@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine): scale = 'linear' return super().benchmark(image, nrow, ncol, scale) - def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1): + def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/core/transform/_le_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_nlm_denoising.pyx index d13a94de..d7a97382 100644 --- a/src/nanopyx/core/transform/_le_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_nlm_denoising.pyx @@ -12,7 +12,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device import os os.environ['PYOPENCL_NO_CACHE']='1' @@ -36,9 +36,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -399,7 +396,11 @@ class NLMDenoising(LiquidEngine): - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + + if device is None: + device = _fastest_device + cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx index 506ce316..c9537358 100644 --- a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx @@ -10,7 +10,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_integral_image.h": @@ -29,9 +29,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_patch" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -423,7 +420,9 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray: + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) dc = device['device'] diff --git a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx index 206d8824..94908e91 100644 --- a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx @@ -12,7 +12,7 @@ from cython.parallel import parallel, prange from .__interpolation_tools__ import check_image from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device cdef extern from "_c_patch_distance.h": @@ -28,9 +28,6 @@ class NLMDenoising(LiquidEngine): self._designation = "NLMDenoising_pixel" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, - python_=True, verbose=verbose) def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray: @@ -373,7 +370,9 @@ class NLMDenoising(LiquidEngine): - def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray: + def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + if device is None: + device = _fastest_device cl_ctx = cl.Context([device['device']]) dc = device['device'] cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx index 69f4f656..464ae495 100644 --- a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx +++ b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx @@ -6,7 +6,7 @@ cimport numpy as np from cython.parallel import parallel, prange from libc.math cimport sqrt, pow -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from .__interpolation_tools__ import check_image @@ -22,8 +22,6 @@ class RadialGradientConvergence(LiquidEngine): self._designation = "RadialGradientConvergence" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) @@ -181,6 +179,9 @@ class RadialGradientConvergence(LiquidEngine): def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1): + if device is None: + device = _fastest_device + # gradient gxgymag*mag*size # image_interp = mag*size # output = image_interp diff --git a/src/nanopyx/core/transform/_le_radiality.pyx b/src/nanopyx/core/transform/_le_radiality.pyx index 76a5c313..663a7e30 100644 --- a/src/nanopyx/core/transform/_le_radiality.pyx +++ b/src/nanopyx/core/transform/_le_radiality.pyx @@ -7,7 +7,7 @@ from cython.parallel import parallel, prange from libc.math cimport sqrt, pi, fabs, cos, sin from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from .__interpolation_tools__ import check_image from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify @@ -33,8 +33,6 @@ class Radiality(LiquidEngine): self._designation = "Radiality" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=False, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): @@ -47,7 +45,7 @@ class Radiality(LiquidEngine): image_interp = check_image(image_interp) return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting) - def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): + """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): cdef int _magnification = magnification cdef int _border = border @@ -83,7 +81,7 @@ class Radiality(LiquidEngine): else: imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w) - return np.asarray(imRad) + return np.asarray(imRad)""" def _run_threaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): @@ -233,6 +231,9 @@ class Radiality(LiquidEngine): def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1): + if device is None: + device = _fastest_device + cl_ctx = cl.Context([device['device']]) cl_queue = cl.CommandQueue(cl_ctx) diff --git a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx index 28fd22f5..6a25034e 100644 --- a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx +++ b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx @@ -2,7 +2,7 @@ import numpy as np cimport numpy as np -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ...__liquid_engine__ import LiquidEngine from cython.parallel import prange @@ -17,8 +17,6 @@ class GradientRobertsCross(LiquidEngine): self._designation = "GradientRobertsCross" super().__init__( clear_benchmarks=clear_benchmarks, testing=testing, - unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose) def run(self, image, run_type = None): @@ -91,7 +89,10 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row - def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1): + def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1): + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx index a61fca34..f221584c 100644 --- a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx +++ b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx @@ -7,7 +7,7 @@ cimport numpy as np from cython.parallel import prange from ...__liquid_engine__ import LiquidEngine -from ...__opencl__ import cl, cl_array +from ...__opencl__ import cl, cl_array, _fastest_device from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot @@ -23,9 +23,7 @@ class MandelbrotBenchmark(LiquidEngine): def __init__(self, clear_benchmarks=False, testing=False, verbose=True): self._designation = "Mandelbrot_Benchmark" super().__init__( - clear_benchmarks=clear_benchmarks, testing=testing, - opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, - threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True, + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray: @@ -43,7 +41,10 @@ class MandelbrotBenchmark(LiquidEngine): def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1): return super().benchmark(size, r_start, r_end, c_start, c_end) - def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray: + def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray: + + if device is None: + device = _fastest_device # QUEUE AND CONTEXT cl_ctx = cl.Context([device['device']]) From 7ffabfafa55feb740a0d17a5bcfd60720fe8a2b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Thu, 16 May 2024 17:51:43 +0100 Subject: [PATCH 06/14] Proof of concept testing for run_type classification --- src/mako_templates/_le_interpolation_base.pyx | 8 ++++++ src/nanopyx/__agent__.py | 11 ++++++-- src/nanopyx/__liquid_engine__.py | 20 ++++++++++--- .../transform/_le_interpolation_bicubic.pyx | 28 +++++++++++++++++++ .../_le_interpolation_catmull_rom.pyx | 28 +++++++++++++++++++ .../transform/_le_interpolation_lanczos.pyx | 28 +++++++++++++++++++ .../_le_interpolation_nearest_neighbor.pyx | 28 +++++++++++++++++++ 7 files changed, 144 insertions(+), 7 deletions(-) diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx index 73806713..824d65de 100644 --- a/src/mako_templates/_le_interpolation_base.pyx +++ b/src/mako_templates/_le_interpolation_base.pyx @@ -63,6 +63,9 @@ class ShiftAndMagnify(LiquidEngine): return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ if device is None: device = _fastest_device # QUEUE AND CONTEXT @@ -113,6 +116,11 @@ class ShiftAndMagnify(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py index 29e123f3..b3827be5 100644 --- a/src/nanopyx/__agent__.py +++ b/src/nanopyx/__agent__.py @@ -53,11 +53,14 @@ def __init__( self.delayed_runtypes = {} # Store runtypes as keys and their values as (delay_factor, delay_prob) - def _get_ordered_run_types(self, fn, args, kwargs): + def _get_ordered_run_types(self, fn, args, kwargs,_possible_runtypes=[]): """@public Retrieves an ordered list of run_types for the given args and kwargs """ + if not _possible_runtypes: + _possible_runtypes = fn.run_types.keys() + # str representation of the arguments and their corresponding 'norm' repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs) # dictionary to hold speeds @@ -68,6 +71,8 @@ def _get_ordered_run_types(self, fn, args, kwargs): # fn._benchmarks is a dictionary of dictionaries. The first key is the run_type, the second key is the repr_args # Check every run_type for the most similar args for run_type in fn._run_types: + if run_type not in _possible_runtypes: + continue if repr_args in fn._benchmarks[run_type]: run_info = fn._benchmarks[run_type][repr_args][1:] else: @@ -196,13 +201,13 @@ def _adjust_times(self, fast_device_times, slow_device_times): return adjusted_times - def get_run_type(self, fn, args, kwargs): + def get_run_type(self, fn, args, kwargs,_possible_runtypes=[]): """ Returns the best run_type for the given args and kwargs """ # Get list of run types - fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs) + fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes) # Penalize the average time a run_type had if that run_type was delayed in previous runs if len(self.delayed_runtypes.keys()) > 0: diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py index 42f80282..c40eabb5 100644 --- a/src/nanopyx/__liquid_engine__.py +++ b/src/nanopyx/__liquid_engine__.py @@ -158,10 +158,22 @@ def _run(self, *args, run_type=None, **kwargs): elif run_type is None: run_type = self.Agent.get_run_type(self, args, kwargs) elif run_type not in self._run_types: - print(f"Unexpected run type {run_type}") - print("Querying the Agent...") - run_type = self.Agent.get_run_type(self, args, kwargs) - print(f"Agent chose: {run_type}") + + # Check if the tags in the run_types + _possible_runtypes = [rt for rt in self._run_types.keys() if f"@{run_type}" in self._run_types[rt].__doc__] + + if not _possible_runtypes: + + print(f"Unexpected run type {run_type}") + print("Querying the Agent...") + run_type = self.Agent.get_run_type(self, args, kwargs) + print(f"Agent chose: {run_type}") + + else: + + print(f"Choosing between all {run_type} implementations") + run_type = self.Agent.get_run_type(self, args, kwargs,_possible_runtypes) + print(f"Agent chose: {run_type}") # try to run try: diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx index 81d3a0d2..8018d8a7 100644 --- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx @@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine): return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ if device is None: device = _fastest_device # QUEUE AND CONTEXT @@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx index 72c0b0f7..d225e3b4 100644 --- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx @@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine): return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ if device is None: device = _fastest_device # QUEUE AND CONTEXT @@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx index 4c696f48..d95c55de 100644 --- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx @@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine): return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ if device is None: device = _fastest_device # QUEUE AND CONTEXT @@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx index ecf3510b..76dd3f0e 100644 --- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx @@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine): return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col) def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + """ if device is None: device = _fastest_device # QUEUE AND CONTEXT @@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] From d04ca3018615daaf3dc6fe37b8e47cf014d210db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Fri, 17 May 2024 11:05:34 +0100 Subject: [PATCH 07/14] Mssing underscore in var name --- src/nanopyx/__agent__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py index b3827be5..4939a1c0 100644 --- a/src/nanopyx/__agent__.py +++ b/src/nanopyx/__agent__.py @@ -59,7 +59,7 @@ def _get_ordered_run_types(self, fn, args, kwargs,_possible_runtypes=[]): """ if not _possible_runtypes: - _possible_runtypes = fn.run_types.keys() + _possible_runtypes = fn._run_types.keys() # str representation of the arguments and their corresponding 'norm' repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs) From 4d9e5208762a90c993bfa073252dfd77f08f17ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Fri, 17 May 2024 11:32:40 +0100 Subject: [PATCH 08/14] Added tags to all methods --- src/mako_templates/_le_interpolation_base.pyx | 25 +++++++- ...core.analysis._le_channel_registration.pyx | 11 +++- ...pyx.core.analysis._le_drift_calculator.pyx | 8 ++- ...nanopyx.core.transform._le_convolution.pyx | 31 +++++++++- .../nanopyx.core.transform._le_esrrf.pyx | 13 +++- .../nanopyx.core.transform._le_esrrf3d.pyx | 10 ++- ...nopyx.core.transform._le_nlm_denoising.pyx | 16 ++++- ...core.transform._le_patch_nlm_denoising.pyx | 15 +++++ ...core.transform._le_pixel_nlm_denoising.pyx | 14 +++++ ...nsform._le_radial_gradient_convergence.pyx | 15 ++++- .../nanopyx.core.transform._le_radiality.pyx | 10 ++- ....transform._le_roberts_cross_gradients.pyx | 14 ++++- ...yx.core.utils._le_mandelbrot_benchmark.pyx | 21 ++++++- .../analysis/_le_channel_registration.pyx | 28 ++++++++- .../core/analysis/_le_drift_calculator.pyx | 11 +++- .../core/transform/_le_convolution.pyx | 52 ++++++++++++++-- src/nanopyx/core/transform/_le_esrrf.pyx | 28 ++++++++- src/nanopyx/core/transform/_le_esrrf3d.pyx | 39 +++++++++--- .../transform/_le_interpolation_bicubic.pyx | 62 ++++++++++++++++--- .../_le_interpolation_catmull_rom.pyx | 62 ++++++++++++++++--- .../transform/_le_interpolation_lanczos.pyx | 62 ++++++++++++++++--- .../_le_interpolation_nearest_neighbor.pyx | 62 ++++++++++++++++--- .../core/transform/_le_nlm_denoising.pyx | 31 +++++++++- .../transform/_le_patch_nlm_denoising.pyx | 30 +++++++++ .../transform/_le_pixel_nlm_denoising.pyx | 31 ++++++++++ .../_le_radial_gradient_convergence.pyx | 33 ++++++++-- src/nanopyx/core/transform/_le_radiality.pyx | 28 +++++++-- .../transform/_le_roberts_cross_gradients.pyx | 29 ++++++++- .../core/utils/_le_mandelbrot_benchmark.pyx | 36 ++++++++++- 29 files changed, 741 insertions(+), 86 deletions(-) diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx index 824d65de..1503d573 100644 --- a/src/mako_templates/_le_interpolation_base.pyx +++ b/src/mako_templates/_le_interpolation_base.pyx @@ -118,7 +118,9 @@ class ShiftAndMagnify(LiquidEngine): def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: """ @cpu + % if sch!='unthreaded': @threaded + % endif @cython """ cdef int nFrames = image.shape[0] @@ -204,7 +206,9 @@ class ShiftScaleRotate(LiquidEngine): return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device @@ -259,6 +263,13 @@ class ShiftScaleRotate(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -345,7 +356,9 @@ class PolarTransform(LiquidEngine): return super().benchmark(image, nrow, ncol, scale) def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -404,7 +417,13 @@ class PolarTransform(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx index 7d8ef454..ec8db173 100644 --- a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx +++ b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx @@ -109,6 +109,13 @@ class ChannelRegistrationEstimator(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ _runtype = "${sch}".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -251,7 +258,9 @@ class ChannelRegistrationEstimator(LiquidEngine): % endfor def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx index 4b83448d..1c928af6 100644 --- a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx +++ b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx @@ -37,7 +37,13 @@ class DriftEstimator(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ if not _check_even_square(image): image = _make_even_square(image) diff --git a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx index e304597a..fa72d800 100644 --- a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx @@ -37,7 +37,13 @@ class Convolution(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -82,7 +88,9 @@ class Convolution(LiquidEngine): % endfor def _run_opencl(self, image, kernel, device=None): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -116,16 +124,35 @@ class Convolution(LiquidEngine): return image_out def _run_python(self, image, kernel): + """ + @cpu + """ return convolution2D_python(image, kernel).astype(np.float32) def _run_transonic(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_transonic(image, kernel).astype(np.float32) def _run_dask(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_dask(image, kernel).astype(np.float32) def _run_cuda(self, image, kernel): + """ + @gpu + """ return convolution2D_cuda(image, kernel).astype(np.float32) def _run_njit(self, image, kernel): + """ + @cpu + @threaded + @numba + """ return convolution2D_numba(image, kernel).astype(np.float32) diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx index ddfe4d05..09710307 100644 --- a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx @@ -37,7 +37,9 @@ class eSRRF(LiquidEngine): return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -152,6 +154,11 @@ class eSRRF(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "${sch}".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -167,6 +174,10 @@ class eSRRF(LiquidEngine): % endfor def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @cython + """ runtype = "Unthreaded" crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx index 2660ed1f..e10253b3 100644 --- a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx @@ -58,8 +58,14 @@ class eSRRF3D(LiquidEngine): return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) % for sch in schedulers: - def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma diff --git a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx index 2f523166..e854107f 100644 --- a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx @@ -73,6 +73,9 @@ class NLMDenoising(LiquidEngine): def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -80,6 +83,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -158,6 +165,11 @@ class NLMDenoising(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -225,7 +237,9 @@ class NLMDenoising(LiquidEngine): def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx index e31a3102..40ceeacf 100644 --- a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx @@ -65,6 +65,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -72,6 +75,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -150,6 +157,11 @@ class NLMDenoising(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -229,6 +241,9 @@ class NLMDenoising(LiquidEngine): def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray: + """ + @gpu + """ if device is None: device = _fastest_device # QUEUE AND CONTEXT diff --git a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx index 750cd7b4..0d90e2a4 100644 --- a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx @@ -64,6 +64,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False) @@ -72,6 +75,13 @@ class NLMDenoising(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + % if sch!='unthreaded': + @threaded + % endif + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -141,6 +151,10 @@ class NLMDenoising(LiquidEngine): def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + @cython + """ if device is None: device = _fastest_device cl_ctx = cl.Context([device['device']]) diff --git a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx index a8896ffc..3ad05ca6 100644 --- a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx @@ -41,7 +41,10 @@ class RadialGradientConvergence(LiquidEngine): return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting) def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -71,7 +74,11 @@ class RadialGradientConvergence(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -105,7 +112,9 @@ class RadialGradientConvergence(LiquidEngine): def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx index 89dd24c7..4b4ac4b5 100644 --- a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx @@ -87,7 +87,11 @@ class Radiality(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -130,7 +134,9 @@ class Radiality(LiquidEngine): def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx index b91d7d53..d3e8ae1c 100644 --- a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx +++ b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx @@ -30,7 +30,10 @@ class GradientRobertsCross(LiquidEngine): return super().benchmark(image) def _run_unthreaded(self, float[:,:,:] image): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) cdef float [:,:,:] gradient_row = np.zeros_like(image) @@ -44,6 +47,11 @@ class GradientRobertsCross(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -62,7 +70,9 @@ class GradientRobertsCross(LiquidEngine): % endfor def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx index df126033..a8f4fcf9 100644 --- a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx +++ b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx @@ -44,7 +44,9 @@ class MandelbrotBenchmark(LiquidEngine): return super().benchmark(size, r_start, r_end, c_start, c_end) def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device @@ -78,6 +80,10 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot.get() def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -95,6 +101,11 @@ class MandelbrotBenchmark(LiquidEngine): % for sch in schedulers: def _run_${sch}(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -116,11 +127,19 @@ class MandelbrotBenchmark(LiquidEngine): % endfor def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray: + """ + @cpu + @threaded + @numba + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot diff --git a/src/nanopyx/core/analysis/_le_channel_registration.pyx b/src/nanopyx/core/analysis/_le_channel_registration.pyx index 8d6945e3..09c6e4c0 100644 --- a/src/nanopyx/core/analysis/_le_channel_registration.pyx +++ b/src/nanopyx/core/analysis/_le_channel_registration.pyx @@ -106,6 +106,10 @@ class ChannelRegistrationEstimator(LiquidEngine): return super().benchmark(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity) def _run_unthreaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @cython + """ _runtype = "unthreaded".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -240,6 +244,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -374,6 +383,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded_guided(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded_guided".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -508,6 +522,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded_dynamic(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded_dynamic".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -642,6 +661,11 @@ class ChannelRegistrationEstimator(LiquidEngine): return np.array(translation_masks) def _run_threaded_static(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity): + """ + @cpu + @threaded + @cython + """ _runtype = "threaded_static".capitalize() crsm = ShiftAndMagnify(verbose=False) @@ -777,7 +801,9 @@ class ChannelRegistrationEstimator(LiquidEngine): def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/nanopyx/core/analysis/_le_drift_calculator.pyx b/src/nanopyx/core/analysis/_le_drift_calculator.pyx index d8f0154a..25165f7f 100644 --- a/src/nanopyx/core/analysis/_le_drift_calculator.pyx +++ b/src/nanopyx/core/analysis/_le_drift_calculator.pyx @@ -34,7 +34,10 @@ class DriftEstimator(LiquidEngine): return super().benchmark(image, time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option) def _run_unthreaded(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - + """ + @cpu + @cython + """ if not _check_even_square(image): image = _make_even_square(image) @@ -123,7 +126,11 @@ class DriftEstimator(LiquidEngine): return np.asarray(output).astype(np.float32) def _run_threaded(self, float[:, :, :] image, int time_averaging=2, int max_drift=5, int ref_option=0): - + """ + @cpu + @threaded + @cython + """ if not _check_even_square(image): image = _make_even_square(image) diff --git a/src/nanopyx/core/transform/_le_convolution.pyx b/src/nanopyx/core/transform/_le_convolution.pyx index 1f4d5fa8..ce2fde47 100644 --- a/src/nanopyx/core/transform/_le_convolution.pyx +++ b/src/nanopyx/core/transform/_le_convolution.pyx @@ -34,7 +34,10 @@ class Convolution(LiquidEngine): return super().benchmark(image, kernel) def _run_unthreaded(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -69,7 +72,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -104,7 +111,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded_guided(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -139,7 +150,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded_dynamic(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -174,7 +189,11 @@ class Convolution(LiquidEngine): return conv_out def _run_threaded_static(self, float[:,:] image, float[:,:] kernel): - + """ + @cpu + @threaded + @cython + """ cdef int nRows = image.shape[0] cdef int nCols = image.shape[1] @@ -210,7 +229,9 @@ class Convolution(LiquidEngine): def _run_opencl(self, image, kernel, device=None): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -244,16 +265,35 @@ class Convolution(LiquidEngine): return image_out def _run_python(self, image, kernel): + """ + @cpu + """ return convolution2D_python(image, kernel).astype(np.float32) def _run_transonic(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_transonic(image, kernel).astype(np.float32) def _run_dask(self, image, kernel): + """ + @cpu + @threaded + """ return convolution2D_dask(image, kernel).astype(np.float32) def _run_cuda(self, image, kernel): + """ + @gpu + """ return convolution2D_cuda(image, kernel).astype(np.float32) def _run_njit(self, image, kernel): + """ + @cpu + @threaded + @numba + """ return convolution2D_numba(image, kernel).astype(np.float32) diff --git a/src/nanopyx/core/transform/_le_esrrf.pyx b/src/nanopyx/core/transform/_le_esrrf.pyx index bc22ee34..df40fa2e 100644 --- a/src/nanopyx/core/transform/_le_esrrf.pyx +++ b/src/nanopyx/core/transform/_le_esrrf.pyx @@ -35,7 +35,9 @@ class eSRRF(LiquidEngine): return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -149,6 +151,11 @@ class eSRRF(LiquidEngine): return output_image def _run_threaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -162,6 +169,11 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_threaded_guided(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded_guided".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -175,6 +187,11 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_threaded_dynamic(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded_dynamic".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -188,6 +205,11 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_threaded_static(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @threaded + @cython + """ runtype = "threaded_static".capitalize() crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) @@ -202,6 +224,10 @@ class eSRRF(LiquidEngine): return radial_gradients def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): + """ + @cpu + @cython + """ runtype = "Unthreaded" crsm = ShiftAndMagnify(verbose=False) rbc = GradientRobertsCross(verbose=False) diff --git a/src/nanopyx/core/transform/_le_esrrf3d.pyx b/src/nanopyx/core/transform/_le_esrrf3d.pyx index 662c1568..e1f27f7b 100644 --- a/src/nanopyx/core/transform/_le_esrrf3d.pyx +++ b/src/nanopyx/core/transform/_le_esrrf3d.pyx @@ -55,8 +55,12 @@ class eSRRF3D(LiquidEngine): image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting) - def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -119,8 +123,12 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -183,8 +191,12 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -247,8 +259,12 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -311,8 +327,11 @@ class eSRRF3D(LiquidEngine): rgc_map[f, sM, rM, cM] = rgc_val return np.asarray(rgc_map) - def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"): - + def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True): + """ + @cpu + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx index 8018d8a7..d5f62230 100644 --- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx @@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine): def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: """ @cpu - @threaded @cython """ cdef int nFrames = image.shape[0] @@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine): return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device @@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine): return super().benchmark(image, nrow, ncol, scale) def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx index d225e3b4..f080fd06 100644 --- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx @@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine): def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: """ @cpu - @threaded @cython """ cdef int nFrames = image.shape[0] @@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine): return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device @@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine): return super().benchmark(image, nrow, ncol, scale) def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx index d95c55de..4537579d 100644 --- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx @@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine): def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: """ @cpu - @threaded @cython """ cdef int nFrames = image.shape[0] @@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine): return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device @@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine): return super().benchmark(image, nrow, ncol, scale) def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx index 76dd3f0e..d53ee3d4 100644 --- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx +++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx @@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine): def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray: """ @cpu - @threaded @cython """ cdef int nFrames = image.shape[0] @@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine): return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle) def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device @@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine): return super().benchmark(image, nrow, ncol, scale) def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device @@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine): return output def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] @@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine): return image_out def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale): - + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef int rows = image.shape[1] cdef int cols = image.shape[2] diff --git a/src/nanopyx/core/transform/_le_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_nlm_denoising.pyx index d7a97382..b5aab716 100644 --- a/src/nanopyx/core/transform/_le_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_nlm_denoising.pyx @@ -71,6 +71,9 @@ class NLMDenoising(LiquidEngine): def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -78,6 +81,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -155,6 +162,11 @@ class NLMDenoising(LiquidEngine): pad_size: -pad_size]).astype(np.float32)) def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -215,6 +227,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -275,6 +292,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -335,6 +357,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -397,7 +424,9 @@ class NLMDenoising(LiquidEngine): def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx index c9537358..f16d28a1 100644 --- a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx @@ -63,6 +63,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) @@ -70,6 +73,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ cdef float distance_cutoff = 5.0 cdef float var = sigma * sigma @@ -147,6 +154,11 @@ class NLMDenoising(LiquidEngine): pad_size: -pad_size]).astype(np.float32)) def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -215,6 +227,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -283,6 +300,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -351,6 +373,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32)) def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ cdef float distance_cutoff = 5.0 @@ -421,6 +448,9 @@ class NLMDenoising(LiquidEngine): def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray: + """ + @gpu + """ if device is None: device = _fastest_device # QUEUE AND CONTEXT diff --git a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx index 94908e91..b0358c27 100644 --- a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx +++ b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx @@ -62,6 +62,9 @@ class NLMDenoising(LiquidEngine): return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + """ out = np.zeros_like(image) for i in range(image.shape[0]): out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False) @@ -69,6 +72,10 @@ class NLMDenoising(LiquidEngine): return np.squeeze(out) def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -129,6 +136,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -189,6 +201,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -249,6 +266,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -309,6 +331,11 @@ class NLMDenoising(LiquidEngine): return np.squeeze(np.asarray(result)) def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ if patch_size % 2 == 0: patch_size = patch_size + 1 # odd value for symmetric patch @@ -371,6 +398,10 @@ class NLMDenoising(LiquidEngine): def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray: + """ + @gpu + @cython + """ if device is None: device = _fastest_device cl_ctx = cl.Context([device['device']]) diff --git a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx index 464ae495..0e9d1ab1 100644 --- a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx +++ b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx @@ -39,7 +39,10 @@ class RadialGradientConvergence(LiquidEngine): return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting) def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -68,7 +71,11 @@ class RadialGradientConvergence(LiquidEngine): return np.asarray(rgc_map,dtype=np.float32) def _run_threaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -95,7 +102,11 @@ class RadialGradientConvergence(LiquidEngine): rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION, fwhm, tSO, tSS, _sensitivity) return np.asarray(rgc_map,dtype=np.float32) def _run_threaded_guided(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -122,7 +133,11 @@ class RadialGradientConvergence(LiquidEngine): rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION, fwhm, tSO, tSS, _sensitivity) return np.asarray(rgc_map,dtype=np.float32) def _run_threaded_dynamic(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -149,7 +164,11 @@ class RadialGradientConvergence(LiquidEngine): rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION, fwhm, tSO, tSS, _sensitivity) return np.asarray(rgc_map,dtype=np.float32) def _run_threaded_static(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True): - + """ + @cpu + @threaded + @cython + """ cdef float sigma = radius / 2.355 cdef float fwhm = radius cdef float tSS = 2 * sigma * sigma @@ -178,7 +197,9 @@ class RadialGradientConvergence(LiquidEngine): def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/nanopyx/core/transform/_le_radiality.pyx b/src/nanopyx/core/transform/_le_radiality.pyx index 663a7e30..bad66375 100644 --- a/src/nanopyx/core/transform/_le_radiality.pyx +++ b/src/nanopyx/core/transform/_le_radiality.pyx @@ -84,7 +84,11 @@ class Radiality(LiquidEngine): return np.asarray(imRad)""" def _run_threaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -120,7 +124,11 @@ class Radiality(LiquidEngine): return np.asarray(imRad) def _run_threaded_guided(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -156,7 +164,11 @@ class Radiality(LiquidEngine): return np.asarray(imRad) def _run_threaded_dynamic(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -192,7 +204,11 @@ class Radiality(LiquidEngine): return np.asarray(imRad) def _run_threaded_static(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True): - + """ + @cpu + @threaded + @cython + """ cdef int _magnification = magnification cdef int _border = border cdef float _ringRadius = ringRadius * magnification @@ -230,7 +246,9 @@ class Radiality(LiquidEngine): def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx index 6a25034e..002423ed 100644 --- a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx +++ b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx @@ -28,7 +28,10 @@ class GradientRobertsCross(LiquidEngine): return super().benchmark(image) def _run_unthreaded(self, float[:,:,:] image): - + """ + @cpu + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) cdef float [:,:,:] gradient_row = np.zeros_like(image) @@ -41,6 +44,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -53,6 +61,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded_guided(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -65,6 +78,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded_dynamic(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -77,6 +95,11 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_threaded_static(self, float[:,:,:] image): + """ + @cpu + @threaded + @cython + """ cdef int nFrames = image.shape[0] cdef float [:,:,:] gradient_col = np.zeros_like(image) @@ -90,7 +113,9 @@ class GradientRobertsCross(LiquidEngine): return gradient_col, gradient_row def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1): - + """ + @gpu + """ if device is None: device = _fastest_device diff --git a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx index f221584c..d28896a9 100644 --- a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx +++ b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx @@ -42,7 +42,9 @@ class MandelbrotBenchmark(LiquidEngine): return super().benchmark(size, r_start, r_end, c_start, c_end) def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray: - + """ + @gpu + """ if device is None: device = _fastest_device @@ -76,6 +78,10 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot.get() def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -92,6 +98,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -107,6 +118,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded_guided(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -122,6 +138,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded_dynamic(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -137,6 +158,11 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_threaded_static(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + @threaded + @cython + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) cdef int[:,:] _im_mandelbrot = im_mandelbrot @@ -153,11 +179,19 @@ class MandelbrotBenchmark(LiquidEngine): return im_mandelbrot def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray: + """ + @cpu + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray: + """ + @cpu + @threaded + @numba + """ im_mandelbrot = np.empty((size, size), dtype=np.int32) _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end) return im_mandelbrot From 6fa0666ddcd9a1c44f333ad8151e8e90373c962a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Fri, 17 May 2024 11:35:40 +0100 Subject: [PATCH 09/14] Removing obsolete prints --- src/nanopyx/__liquid_engine__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py index c40eabb5..a044d037 100644 --- a/src/nanopyx/__liquid_engine__.py +++ b/src/nanopyx/__liquid_engine__.py @@ -103,7 +103,6 @@ def __init__( # Lowercase everything for backwards compatibility self._benchmarks = {k.lower(): v for k, v in self._benchmarks.items()} - print(self._benchmarks.keys()) # check if the benchmark dictionary has a key for every available run type for run_type_designation in self._run_types.keys(): @@ -126,7 +125,6 @@ def __init__( ) # Lowercase everything for backwards compatibility self._default_benchmarks = {k.lower(): v for k, v in self._default_benchmarks.items()} - print(self._default_benchmarks.keys()) except: self._default_benchmarks = [] From fcdcf43acdac4c57257d3adece5c10288103b72a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B3nio=20Brito?= <50997716+antmsbrito@users.noreply.github.com> Date: Fri, 17 May 2024 13:01:23 +0100 Subject: [PATCH 10/14] Added further error handling to the agent --- src/nanopyx/__agent__.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py index 4939a1c0..be03b53c 100644 --- a/src/nanopyx/__agent__.py +++ b/src/nanopyx/__agent__.py @@ -207,7 +207,18 @@ def get_run_type(self, fn, args, kwargs,_possible_runtypes=[]): """ # Get list of run types - fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes) + try: + fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes) + except TypeError: + print(f"There seems to be an error regarding your benchmarks. \n\ +To give full control to the agent please ensure that one of the following is true: \n\ +\t - You have at least 3 benchmarks for all runtypes using any set of args,kwargs \n\ +\t - Provide a set of default benchmarks during the Liquid Engine class creation \n\ +Otherwise explicity choose one of the following run_types:") + print('\t-','\n\t- '.join(fn._run_types.keys())) + + print("The agent will choose a random run_type") + return random.choices(list(fn._run_types.keys()), k=1)[0] # Penalize the average time a run_type had if that run_type was delayed in previous runs if len(self.delayed_runtypes.keys()) > 0: From 699f3e2d30e887d70cff6b01161997cc9eab4946 Mon Sep 17 00:00:00 2001 From: Bruno Saraiva Date: Fri, 17 May 2024 14:05:04 +0100 Subject: [PATCH 11/14] example of implementing custom le class with switch --- .../LiquidEngineImplementationExample.ipynb | 139 ++++++++++++++++++ notebooks/myliquidengineclass.py | 22 +++ 2 files changed, 161 insertions(+) create mode 100644 notebooks/LiquidEngineImplementationExample.ipynb create mode 100644 notebooks/myliquidengineclass.py diff --git a/notebooks/LiquidEngineImplementationExample.ipynb b/notebooks/LiquidEngineImplementationExample.ipynb new file mode 100644 index 00000000..ad8df266 --- /dev/null +++ b/notebooks/LiquidEngineImplementationExample.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a Liquid Engine Class implementing the two modes of Scikit-image NLM denoising as two different implementations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a random image array to be processed" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "img = np.random.random((1, 100, 100)).astype(np.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark the two implementations" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cupy implementation is not available. Make sure you have the right version of Cupy and CUDA installed.\n", + "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 21.019929375033826 seconds\n", + "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.3058308749459684 seconds\n", + "Fastest run type: ski_nlm_nonfast\n", + "Slowest run type: ski_nlm_fast\n", + "ski_nlm_nonfast is 68.73x faster than ski_nlm_fast\n" + ] + }, + { + "data": { + "text/plain": [ + "[(0.3058308749459684, 'ski_nlm_nonfast', None),\n", + " (21.019929375033826, 'ski_nlm_fast', None)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from myliquidengineclass import MyLiquidEngineClass\n", + "my_liquid = MyLiquidEngineClass()\n", + "my_liquid.benchmark(img, patch_size=5, patch_distance=11, h=0.1, sigma=0.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark the two implementations with different image size" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 0.013037208002060652 seconds\n", + "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.06136862491257489 seconds\n", + "Fastest run type: ski_nlm_fast\n", + "Slowest run type: ski_nlm_nonfast\n", + "ski_nlm_fast is 4.71x faster than ski_nlm_nonfast\n" + ] + }, + { + "data": { + "text/plain": [ + "[(0.013037208002060652, 'ski_nlm_fast', None),\n", + " (0.06136862491257489, 'ski_nlm_nonfast', None)]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_liquid = MyLiquidEngineClass()\n", + "my_liquid.benchmark(np.random.random((500, 500)).astype(np.float32), patch_size=5, patch_distance=1, h=0.1, sigma=0.0)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ocb_dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/myliquidengineclass.py b/notebooks/myliquidengineclass.py new file mode 100644 index 00000000..91ab3b1d --- /dev/null +++ b/notebooks/myliquidengineclass.py @@ -0,0 +1,22 @@ +import numpy as np +from nanopyx.__liquid_engine__ import LiquidEngine +from skimage.restoration import denoise_nl_means + + +class MyLiquidEngineClass(LiquidEngine): + + def __init__(self, clear_benchmarks=False, testing=False, verbose=True): + self._designation = "MyLiquidEngineClass" + super().__init__( + clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose) + + def run(self, image: np.ndarray, patch_size: int, patch_distance: int, h: float, sigma: float, run_type:bool=None): + if image.dtype != "np.float32": + image = image.astype("np.float32") + return self._run(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) + + def _run_ski_nlm_fast(self, image, patch_size, patch_distance, h, sigma): + return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True) + + def _run_ski_nlm_nonfast(self, image, patch_size, patch_distance, h, sigma): + return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False) \ No newline at end of file From 27c233cc726d29dc48847e0d1a11795e194a2bf4 Mon Sep 17 00:00:00 2001 From: Bruno Saraiva Date: Fri, 17 May 2024 14:05:13 +0100 Subject: [PATCH 12/14] update to pytest settings --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d9047f8f..0f35f3ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,7 +139,7 @@ reportUndefinedVariable = false reportMissingImports = false [tool.pytest.ini_options] -addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks" +addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks --cov-report term-missing" timeout = 6001 plt_dirname = "tests_plots" doctest_encoding = "latin1" From 11ada1b4fa8078a4b793e265cc271d9b6e879491 Mon Sep 17 00:00:00 2001 From: Bruno Saraiva Date: Wed, 22 May 2024 12:13:11 +0100 Subject: [PATCH 13/14] changed to use external liquid_engine package --- pyproject.toml | 1 + src/nanopyx/__agent__.py | 261 +----------------- src/nanopyx/__liquid_engine__.py | 452 +------------------------------ 3 files changed, 3 insertions(+), 711 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0f35f3ef..fb2b1eee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ + "liquid_engine", "mako>=1.3.0", "cython>=0.29.32", "numpy>=1.22,<2", diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py index be03b53c..641a80a1 100644 --- a/src/nanopyx/__agent__.py +++ b/src/nanopyx/__agent__.py @@ -1,260 +1 @@ -import platform -import random - -import numpy as np -from sklearn.linear_model import LogisticRegression -from scipy.stats import norm - -from .__njit__ import njit_works -from .__opencl__ import opencl_works, devices - - -class Agent_: - - """ - Base class for the Agent of the Nanopyx Liquid Engine - Pond, James Pond - """ - - def __init__( - self, - ) -> None: - """ - Initialize the Agent - The agent is supposed to work as a singleton object, initialized only once in the __init__.py of nanopyx - PS: (Is this good enough or is it necessary to implement the singleton design pattern?) - - Agent responsabilities: - 1. Store the current state of the machine (e.g. OS, CPU, RAM, GPU, Python version etc.); - 2. Store the current state of ALL initialized LE objects (e.g. anything that is currently running, anything that is scheduled to run, - runs previously executed in the current session etc.); - 3. Whenever a LE object wants to run, it must query the Agent on what is the best implementation for it; - 4. Tests whether there was an unexpected delay and adjust following paths based on it; - """ - - ### MACHINE INFO ### - self.os_info = {"OS": platform.platform(), "Architecture": platform.machine()} - self.cpu_info = {"CPU": platform.processor()} - self.ram_info = {"RAM": "TBD"} - self.py_info = { - "Version": platform.python_version(), - "Implementation": platform.python_implementation(), - "Compiler": platform.python_compiler(), - } - - self.numba_info = {"Numba": njit_works()} - self.pyopencl_info = {"PyOpenCL": opencl_works(), "Devices": devices} - self.cuda_info = {"CUDA": "TBD"} - ### MACHINE INFO ### - - self._current_runs = [] - self._scheduled_runs = [] - self._finished_runs = [] - - self.delayed_runtypes = {} # Store runtypes as keys and their values as (delay_factor, delay_prob) - - def _get_ordered_run_types(self, fn, args, kwargs,_possible_runtypes=[]): - """@public - Retrieves an ordered list of run_types for the given args and kwargs - """ - - if not _possible_runtypes: - _possible_runtypes = fn._run_types.keys() - - # str representation of the arguments and their corresponding 'norm' - repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs) - # dictionary to hold speeds - fast_avg_speed = {} - fast_std_speed = {} - slow_avg_speed = {} - slow_std_speed = {} - # fn._benchmarks is a dictionary of dictionaries. The first key is the run_type, the second key is the repr_args - # Check every run_type for the most similar args - for run_type in fn._run_types: - if run_type not in _possible_runtypes: - continue - if repr_args in fn._benchmarks[run_type]: - run_info = fn._benchmarks[run_type][repr_args][1:] - else: - # if the repr_args are not in the benchmarks, find the most similar repr_args - best_score = np.inf - best_repr_args = None - for repr_args_ in fn._benchmarks[run_type]: - score = np.abs(fn._benchmarks[run_type][repr_args_][0] - repr_norm) - if score < best_score: - best_score = score - best_repr_args = repr_args_ - # What happens if there are no benchmarks for this runtype? - if best_repr_args is None: - run_info = [0] - else: - run_info = fn._benchmarks[run_type][best_repr_args][1:] - - if None in run_info: # yamls null are read into None python objects - continue - - if len(run_info) < 2: - # Fall back to default values - if "opencl" in run_type: - rt = "opencl" - else: - rt = run_type - - best_score = np.inf - best_repr_args = None - for repr_args_ in fn._default_benchmarks[rt]: - score = np.abs(fn._default_benchmarks[rt][repr_args_][0] - repr_norm) - if score < best_score: - best_score = score - best_repr_args = repr_args_ - run_info = fn._default_benchmarks[rt][best_repr_args][1:] - - run_info = np.array(run_info) - if len(run_info) > 50: - run_info = run_info[-50:] - - fast_values = np.partition(run_info, len(run_info) // 2)[: len(run_info) // 2] - slow_values = np.partition(run_info, len(run_info) // 2)[len(run_info) // 2 :] - fast_avg_speed[run_type] = np.average(fast_values) - fast_std_speed[run_type] = np.std(fast_values) - slow_avg_speed[run_type] = np.average(slow_values) - slow_std_speed[run_type] = np.std(slow_values) - - return fast_avg_speed, fast_std_speed, slow_avg_speed, slow_std_speed - - def _calculate_prob_of_delay(self, runtimes_history, avg, std): - """@public - Calculates the probability that the given run_type is still delayed using historical data - """ - - # Boolean array, True if delay, False if not - delays = runtimes_history > avg + 4 * std - - model = LogisticRegression() - model.fit([[state] for state in delays[:-1]], delays[1:]) - - return model.predict_proba([[True]])[:, model.classes_.tolist().index(True)][0] - - def _check_delay(self, run_type, runtime, runtimes_history, verbose=True): - """@public - Checks if the given run_type ran delayed in the previous run when compared with historical data - If delayed: - 1. Calculates a probability that this delay is maintained - 2. Stores the delay factor and the probability - """ - # TODO test - threaded_runtypes = ["threaded", "threaded_static", "threaded_dynamic", "threaded_guided"] - - runtimes_history = np.array(runtimes_history) - if len(runtimes_history) > 50: - runtimes_history = runtimes_history[-50:] - fast_values = np.partition(runtimes_history, len(runtimes_history) // 2)[: len(runtimes_history) // 2] - slow_values = np.partition(runtimes_history, len(runtimes_history) // 2)[len(runtimes_history) // 2 :] - - fast_avg_speed = np.average(fast_values) - fast_std_speed = np.std(fast_values) - slow_avg_speed = np.average(slow_values) - slow_std_speed = np.std(slow_values) - - if run_type in self.delayed_runtypes: - if runtime < (slow_avg_speed - slow_std_speed) or runtime < (fast_avg_speed + fast_std_speed): - if "threaded" in run_type: - for threaded_run_type in threaded_runtypes: - self.delayed_runtypes.pop(threaded_run_type, None) - else: - if run_type in self.delayed_runtypes: - self.delayed_runtypes.pop(run_type, None) - return "Delay off" - - if runtime > fast_avg_speed + 4 * fast_std_speed: - runtimes_history = np.append(runtimes_history, runtime) - delay_factor = runtime / fast_avg_speed - try: - delay_prob = self._calculate_prob_of_delay(runtimes_history, fast_avg_speed, fast_std_speed) - except ValueError: - delay_prob = 0.01 - if verbose: - print( - f"Run type {run_type} was delayed in the previous run. Delay factor: {delay_factor}, Delay probability: {delay_prob}" - ) - - if "threaded" in run_type: - for threaded_run_type in threaded_runtypes: - self.delayed_runtypes[threaded_run_type] = (delay_factor, delay_prob) - else: - self.delayed_runtypes[run_type] = (delay_factor, delay_prob) - - def _adjust_times(self, fast_device_times, slow_device_times): - """@public - Adjusts the historic avg time of a run_type if it was delayed in previous runs - """ - adjusted_times = fast_device_times.copy() - for runtype in self.delayed_runtypes.keys(): - if runtype in fast_device_times.keys(): - delay_factor, delay_prob = self.delayed_runtypes[runtype] - # Weighted avg by the probability the run_type is still delayed - # expected_time * P(~delay) + delayed_time * P(delay) - adjusted_times[runtype] = ( - fast_device_times[runtype] * (1 - delay_prob) - + fast_device_times[runtype] * delay_factor * delay_prob - ) - - return adjusted_times - - def get_run_type(self, fn, args, kwargs,_possible_runtypes=[]): - """ - Returns the best run_type for the given args and kwargs - """ - - # Get list of run types - try: - fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes) - except TypeError: - print(f"There seems to be an error regarding your benchmarks. \n\ -To give full control to the agent please ensure that one of the following is true: \n\ -\t - You have at least 3 benchmarks for all runtypes using any set of args,kwargs \n\ -\t - Provide a set of default benchmarks during the Liquid Engine class creation \n\ -Otherwise explicity choose one of the following run_types:") - print('\t-','\n\t- '.join(fn._run_types.keys())) - - print("The agent will choose a random run_type") - return random.choices(list(fn._run_types.keys()), k=1)[0] - - # Penalize the average time a run_type had if that run_type was delayed in previous runs - if len(self.delayed_runtypes.keys()) > 0: - adjusted_avg = self._adjust_times(fast_avg, slow_avg) - - if sorted(fast_avg, key=fast_avg.get)[0] == sorted(adjusted_avg, key=adjusted_avg.get)[0]: - return sorted(fast_avg, key=fast_avg.get)[0] - - weights = [(1 / adjusted_avg[k]) ** 2 for k in adjusted_avg] - weights = weights / np.sum(weights) - - # failsafe - if sum(weights) == 0: - weights = [1 for k in adjusted_avg] - - return random.choices(list(adjusted_avg.keys()), weights=weights, k=1)[0] - else: - return sorted(fast_avg, key=fast_avg.get)[0] - - def _inform(self, fn, verbose=True): - """@public - Informs the Agent that a LE object finished running - """ - - repr_args = fn._last_args - run_type = fn._last_runtype - - historical_data = fn._benchmarks[run_type][repr_args][1:] - - assert historical_data[-1] == fn._last_time, "Historical data is not consistent with the last runtime" - - if verbose: - print(f"Agent: {fn._designation} using {run_type} ran in {fn._last_time} seconds") - - if len(historical_data) > 19: - self._check_delay(run_type, historical_data[-1], historical_data[:-1], verbose=verbose) - - -Agent = Agent_() +from liquid_engine import Agent \ No newline at end of file diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py index a044d037..6638e067 100644 --- a/src/nanopyx/__liquid_engine__.py +++ b/src/nanopyx/__liquid_engine__.py @@ -1,451 +1 @@ -import os -import timeit -import yaml -import datetime -import inspect -import warnings -from functools import partial, reduce -from itertools import combinations -from pathlib import Path - -from importlib_resources import files - -import numpy as np - -# This will in the future come from the Agent -from .__njit__ import njit_works -from .__dask__ import dask_works -from .__transonic__ import transonic_works -from .__cuda__ import cuda_works -from .__opencl__ import opencl_works, devices, cl - -__home_folder__ = os.path.expanduser("~") -__benchmark_folder__ = os.path.join(__home_folder__, ".nanopyx") -if not os.path.exists(__benchmark_folder__): - os.makedirs(__benchmark_folder__) - -from .__agent__ import Agent # noqa: E402 - -from .core.analysis.pearson_correlation import pearson_correlation - - -class LiquidEngine: - """@public - Base class for parts of the Nanopyx Liquid Engine - Vroom Vroom - """ - - def __init__( - self, - testing: bool = False, - clear_benchmarks: bool = False, - verbose: bool = True, - ) -> None: - """@public - Initialize the Liquid Engine - The Liquid Engine base class is inherited by children classes that implement specific methods - - Engine responsabilities: - 1. Store implemented run types; - 2. Handle previous benchmarks and I/O; - 2. When queried, benchmark all available run types; - 3. Run a specific method using a selected run type; - - Benchmark files have the following format: - The benchmark file is read as dict of dicts. - BENCHMARK DICT FOR A SPECIFIC METHOD - |- RUN_TYPE #1 - | |- ARGS_REPR #1 - | | |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail - | |- ARGS_REPR #2 - | | |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail - | (...) - |- RUN_TYPE #2 - (...) - """ - - # Start by checking available run types - self._run_types = {} - for rt in inspect.getmembers(self,inspect.ismethod): - if rt[0].startswith('_run_'): - runtypename = '_'.join(rt[0].split('_')[2:]).lower() - # TODO Recheck this logic TODO - if 'numba' in runtypename and not njit_works(): - continue - elif 'dask' in runtypename and not dask_works(): - continue - elif 'transonic' in runtypename and not transonic_works(): - continue - elif 'cuda' in runtypename and not cuda_works(): - continue - elif 'opencl' in runtypename and not opencl_works(): - continue - else: - self._run_types[runtypename] = rt[1] - - self.testing = testing - self.mem_div = 1 - - # benchmarks file path - # e.g.: ~/.nanopyx/liquid/_le_interpolation_nearest_neighbor.cpython-310-darwin/ShiftAndMagnify.yml - base_path = os.path.join( - __benchmark_folder__, "liquid", os.path.split(os.path.splitext(inspect.getfile(self.__class__))[0])[1] - ) - os.makedirs(base_path, exist_ok=True) - self._benchmark_filepath = os.path.join(base_path, self.__class__.__name__ + ".yml") - - # Load benchmark file if it exists, otherwise create an empty config - if not clear_benchmarks and os.path.exists(self._benchmark_filepath): - with open(self._benchmark_filepath) as f: - self._benchmarks = yaml.load(f, Loader=yaml.FullLoader) - else: - self._benchmarks = {} - - # Lowercase everything for backwards compatibility - self._benchmarks = {k.lower(): v for k, v in self._benchmarks.items()} - - # check if the benchmark dictionary has a key for every available run type - for run_type_designation in self._run_types.keys(): - if run_type_designation not in self._benchmarks: - self._benchmarks[run_type_designation] = {} - - # helper attribute for benchmarking function - self._last_args = None - self._last_runtype = None - self._last_time = None - - self.Agent = Agent - - # load defaults - try: - self._default_benchmarks = yaml.safe_load( - files(f'liquid_benchmarks.{inspect.getmodule(self.__class__).__name__.split(".")[-1]}') - .joinpath(self.__class__.__name__ + ".yml") - .read_text() - ) - # Lowercase everything for backwards compatibility - self._default_benchmarks = {k.lower(): v for k, v in self._default_benchmarks.items()} - except: - self._default_benchmarks = [] - - self.verbose = verbose - - def _run(self, *args, run_type=None, **kwargs): - """@public - Runs the function with the given args and kwargs - - The code above does the following: - 1. Check the specified run_type - - if str checks if the run type exists otherwise raise a NotImplementedError - 2. It will run the _run_{run_type} function - 3. It will return the result and the time taken to run - - :param args: args for the function - :param run_type: the run type to use - :param kwargs: kwargs for the function - :return: the result and time taken - """ - - if run_type is not None: - run_type = run_type.lower() - - if run_type is None and self.verbose: - print("Querying the Agent...") - run_type = self.Agent.get_run_type(self, args, kwargs) - print(f"Agent chose: {run_type}") - elif run_type is None: - run_type = self.Agent.get_run_type(self, args, kwargs) - elif run_type not in self._run_types: - - # Check if the tags in the run_types - _possible_runtypes = [rt for rt in self._run_types.keys() if f"@{run_type}" in self._run_types[rt].__doc__] - - if not _possible_runtypes: - - print(f"Unexpected run type {run_type}") - print("Querying the Agent...") - run_type = self.Agent.get_run_type(self, args, kwargs) - print(f"Agent chose: {run_type}") - - else: - - print(f"Choosing between all {run_type} implementations") - run_type = self.Agent.get_run_type(self, args, kwargs,_possible_runtypes) - print(f"Agent chose: {run_type}") - - # try to run - try: - if self.mem_div > 999: - raise ValueError( - f"Maxmimum memory division factor achieved, can not try any longer with {run_type}. Use a smaller input or a different run_type" - ) - t_start = timeit.default_timer() - result = self._run_types[run_type](*args, **kwargs) - t2run = timeit.default_timer() - t_start - arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs) - self._store_results(arg_repr, arg_score, run_type, t2run) - - self._last_time = t2run - self._last_args = arg_repr - self._last_runtype = run_type - - self.Agent._inform(self, verbose=self.verbose) - - except (cl.MemoryError, cl.LogicError) as e: - print("Found: ", e) - print("Reducing maximum buffer size and trying again...") - self.mem_div += 1 - kwargs["mem_div"] = self.mem_div - result = self._run(*args, run_type=run_type, **kwargs) - except cl.Error as e: - if e.__str__() == "Buffer size is larger than device maximum memory allocation size": - print("Found: ", e) - print("Reducing maximum buffer size and trying again...") - self.mem_div += 1 - kwargs["mem_div"] = self.mem_div - result = self._run(*args, run_type=run_type, **kwargs) - else: - print(f"Unexpected error while trying to run {run_type}") - print(e) - print("Please try again with another run type") - result = None - except Exception as e: - print(f"Unexpected error while trying to run {run_type}") - print(e) - print("Please try again with another run type") - result = None - - self.mem_div = 1 - return result - - def benchmark(self, *args, **kwargs): - """ - 1. Run each available run type and record the run time and return value - 2. Sort the run times from fastest to slowest - 3. Compare each run type against each other, sorted by speed - - :param args: args for the run method - :param kwargs: kwargs for the run method - :return: a list of tuples containing the run time, run type name and optionally the return values - :rtype: [[run_time, run_type_name, return_value], ...] - """ - - # Create some lists to store runtimes and return values of run types - run_times = {} - returns = {} - - # Run each run type and record the run time and return value - for run_type in self._run_types: - r = self._run(*args, run_type=run_type, **kwargs) - - run_times[run_type] = self._last_time - - if self.testing: # Store return values if testing - returns[run_type] = r - else: - returns[run_type] = None - - # Sort run_times by value - speed_sort = [] - for run_type in sorted(run_times, key=run_times.get, reverse=False): - speed_sort.append( - ( - run_times[run_type], - run_type, - returns[run_type], - ) - ) - - print(f"Fastest run type: {speed_sort[0][1]}") - print(f"Slowest run type: {speed_sort[-1][1]}") - - # Compare each run type against each other, sorted by speed - different_runtypes = [] - for pair in combinations(speed_sort, 2): - print(f"{pair[0][1]} is {pair[1][0]/pair[0][0]:.2f}x faster than {pair[1][1]}") - if self.testing: - if self._compare_runs(pair[0][2], pair[1][2]): - print(f"{pair[0][1]} and {pair[1][1]} have similar outputs!") - else: - warnings.warn(f"WARNING: outputs of {pair[0][1]} and {pair[1][1]} don't match!") - different_runtypes.append(set([pair[0][1], pair[1][1]])) - if len(different_runtypes) <= len(self._run_types) - 1: - try: - common_runtype = reduce(lambda a, b: a & b, different_runtypes) - except TypeError: - common_runtype = {} - if common_runtype: - warnings.warn(f"WARNING: disabling {list(common_runtype)[0]} for this set of arguments!") - arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs) - self._store_results(arg_repr, arg_score, list(common_runtype)[0], None) # None saves to null in yamls - - return speed_sort - - def _compare_runs(self, output_1, output_2): - """@public""" - if output_1.ndim > 2: - pcc = 0 - for i in range(output_1.shape[0]): - pcc += pearson_correlation(output_1[i, :, :], output_2[i, :, :]) - pcc /= output_1.shape[0] - else: - pcc = pearson_correlation(output_1, output_2) - - if pcc > 0.8: - return True - else: - return False - - def _get_cl_code(self, file_name, cl_dp): - """ - Retrieves the OpenCL code from the corresponding .cl file - """ - cl_file = os.path.splitext(file_name)[0] + ".cl" - - if not os.path.exists(cl_file): - cl_file = Path(os.path.abspath(inspect.getfile(self.__class__))).parent / file_name - - assert os.path.exists(cl_file), "Could not find OpenCL file: " + str(cl_file) - - kernel_str = open(cl_file).read() - - if not cl_dp: - kernel_str = kernel_str.replace("double", "float") - - return kernel_str - - def _store_results(self, arg_repr, arg_score, run_type, t2run): - """@public - Stores the results of a run - """ - - # Check if the run type has been run, and if not create empty info - run_type_benchs = self._benchmarks[run_type] - if arg_repr not in run_type_benchs: - run_type_benchs[arg_repr] = [arg_score] - - # Get the run info - c = run_type_benchs[arg_repr] - - assert c[0] == arg_score, "arg_score mismatch" - - c.append(t2run) - - self._dump_run_times() - - def _dump_run_times( - self, - ): - """@public""" - # TODO We might need to wrap this into a multiprocessing.Queue if we find it blocking - with open(self._benchmark_filepath, "w") as f: - yaml.dump(self._benchmarks, f) - - def _get_args_repr_score(self, *args, **kwargs): - """@public - Get a string representation of the args and kwargs and corresponding 'score' / 'norm' - The idea is that similar args have closer 'score'. Fuzzy logic - - The code does the following: - 1. It converts any args that are floats or ints to "number()" strings, and any args that are tensors to "shape()" strings - 2. It converts any kwargs that are floats or ints to "number()" strings, and any kwargs that are tensors to "shape()" strings - 3. The 'score' is given by the product of all the floats or ints and all the shape sizes. - - :return: the string representation of the args and kwargs - :rtype: str - """ - _norm = 1 - _args = [] - for arg in args: - if type(arg) in (float, int): - _args.append(f"number({arg})") - if arg == 0: - arg = 1 - _norm *= arg - elif hasattr(arg, "shape"): - _args.append(f"shape{arg.shape}") - _norm *= arg.size - else: - _args.append(arg) - - _kwargs = {} - for k, v in kwargs.items(): - if type(v) in (float, int): - _kwargs[k] = f"number({v})" - if v == 0: - v = 1 - _norm *= v - if hasattr(v, "shape"): - _kwargs[k] = f"shape{arg.shape}" - _norm *= v.size - else: - _kwargs[k] = v - - return repr((_args, _kwargs)), _norm - - def get_highest_divisor(self, size_, max_): - """ - Returns the highest divisor of size_ that is still lower than max_ - """ - value = 1 - for i in range(1, int(np.sqrt(size_) + 1)): - if size_ % i == 0: - if i * i != size_: - div2 = size_ / i - - if i < max_: - value = max(value, i) - if div2 < max_: - value = max(value, div2) - return int(value) - - def get_work_group(self, device, shape): - """ - Calculates work group size for a given device and shape of global work space - """ - - max_wg_dims = device.max_work_item_sizes[0:3] - max_glo_dims = device.max_work_group_size - - three = self.get_highest_divisor(shape[2], max_wg_dims[2]) - max_two = max_glo_dims / three - two = self.get_highest_divisor(shape[1], max_two) - one = 1 - return (one, two, three) - - def _check_max_slices(self, input, number_of_max_slices): - """@public - Checks if number of maximum slices is greater than 0 - """ - if number_of_max_slices < 1: - raise ValueError("This device doesn't have enough memory to run this function with this input") - elif input.shape[0] < number_of_max_slices: - return input.shape[0] - else: - return number_of_max_slices - - def _check_max_buffer_size(self, size, device, n_slices): - """@public - Checks if buffer size is larger than device maximum memory allocation size and n_slices is 1 and raises appropriate errors that are handled in the _run function. - """ - if size > device.max_mem_alloc_size and n_slices == 1: - raise ValueError( - "This device cannot handle this input size with these parameters, try using a smaller input or other parameters" - ) - - if size > device.max_mem_alloc_size: - raise cl.Error("Buffer size is larger than device maximum memory allocation size") - - return size - - ##################################################### - # RUN METHODS # - # THESE SHOULD ALWAYS BE OVERRIDEN BY CHILD CLASSES # - ##################################################### - - def run(self, *args, **kwargs): - """ - Runs the function with the given args and kwargs - Should be overridden by the any class that inherits from this class - """ - return self._run(*args, **kwargs) +from liquid_engine import LiquidEngine \ No newline at end of file From 0fb8da297d0806d29789999afc5466b6bf136e9f Mon Sep 17 00:00:00 2001 From: Bruno Saraiva Date: Tue, 4 Jun 2024 10:43:22 +0100 Subject: [PATCH 14/14] added maximum accepted version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fb2b1eee..4da05d08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ build-backend = "setuptools.build_meta" name = "nanopyx" description = "Nanoscopy Python library (NanoPyx, the successor to NanoJ) - focused on light microscopy and super-resolution imaging" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.9,<3.12" license = { file = "LICENSE.txt" } keywords = [ "NanoJ",