From b3b6409dc45663c77b64d82f9eee84a8121b82f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Mon, 13 May 2024 16:27:38 +0100
Subject: [PATCH 01/14] Refactoring named runtypes

---
 src/nanopyx/__agent__.py         |  10 +--
 src/nanopyx/__liquid_engine__.py | 143 ++++++-------------------------
 2 files changed, 30 insertions(+), 123 deletions(-)

diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py
index 946e893a..683255c6 100644
--- a/src/nanopyx/__agent__.py
+++ b/src/nanopyx/__agent__.py
@@ -90,8 +90,8 @@ def _get_ordered_run_types(self, fn, args, kwargs):
 
             if len(run_info) < 2:
                 # Fall back to default values
-                if "OpenCL" in run_type:
-                    rt = "OpenCL"
+                if "opencl" in run_type:
+                    rt = "opencl"
                 else:
                     rt = run_type
 
@@ -138,7 +138,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True):
             2. Stores the delay factor and the probability
         """
 
-        threaded_runtypes = ["Threaded", "Threaded_static", "Threaded_dynamic", "Threaded_guided"]
+        threaded_runtypes = ["threaded", "threaded_static", "threaded_dynamic", "threaded_guided"]
 
         runtimes_history = np.array(runtimes_history)
         if len(runtimes_history) > 50:
@@ -153,7 +153,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True):
 
         if run_type in self.delayed_runtypes:
             if runtime < (slow_avg_speed - slow_std_speed) or runtime < (fast_avg_speed + fast_std_speed):
-                if "Threaded" in run_type:
+                if "threaded" in run_type:
                     for threaded_run_type in threaded_runtypes:
                         self.delayed_runtypes.pop(threaded_run_type, None)
                 else:
@@ -173,7 +173,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True):
                     f"Run type {run_type} was delayed in the previous run. Delay factor: {delay_factor}, Delay probability: {delay_prob}"
                 )
 
-            if "Threaded" in run_type:
+            if "threaded" in run_type:
                 for threaded_run_type in threaded_runtypes:
                     self.delayed_runtypes[threaded_run_type] = (delay_factor, delay_prob)
             else:
diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py
index 56b7b692..d911f872 100644
--- a/src/nanopyx/__liquid_engine__.py
+++ b/src/nanopyx/__liquid_engine__.py
@@ -38,17 +38,6 @@ class LiquidEngine:
     def __init__(
         self,
         testing: bool = False,
-        opencl_: bool = False,
-        unthreaded_: bool = False,
-        threaded_: bool = False,
-        threaded_static_: bool = False,
-        threaded_dynamic_: bool = False,
-        threaded_guided_: bool = False,
-        python_: bool = False,
-        njit_: bool = False,
-        dask_: bool = False,
-        transonic_: bool = False,
-        cuda_: bool = False,
         clear_benchmarks: bool = False,
         verbose: bool = True,
     ) -> None:
@@ -77,34 +66,22 @@ def __init__(
 
         # Start by checking available run types
         self._run_types = {}
-        if opencl_ and opencl_works():
-            for d in devices:
-                self._run_types[f"OpenCL_{d['device'].name}"] = partial(self._run_opencl, device=d)
-        if threaded_:
-            self._run_types["Threaded"] = self._run_threaded
-        if unthreaded_:
-            self._run_types["Unthreaded"] = self._run_unthreaded
-        if threaded_static_:
-            self._run_types["Threaded_static"] = self._run_threaded_static
-        if threaded_dynamic_:
-            self._run_types["Threaded_dynamic"] = self._run_threaded_dynamic
-        if threaded_guided_:
-            self._run_types["Threaded_guided"] = self._run_threaded_guided
-        if python_:
-            self._run_types["Python"] = self._run_python
-        if njit_ and njit_works():
-            self._run_types["Numba"] = self._run_njit
-            # Try to trigger early compilation
-            try:
-                self._run_njit()
-            except TypeError:
-                print("Consider adding default arguments to the njit implementation to trigger early compilation")
-        if dask_ and dask_works():
-            self._run_types["Dask"] = self._run_dask
-        if transonic_ and transonic_works():
-            self._run_types["Transonic"] = self._run_transonic
-        if cuda_ and cuda_works():
-            self._run_types["Cuda"] = self._run_cuda
+        for rt in inspect.getmembers(self,inspect.ismethod):
+            if rt[0].startswith('_run_'):
+                runtypename = '_'.join(rt[0].split('_')[2:]).lower()
+                # TODO Recheck this logic TODO
+                if 'numba' in runtypename and not njit_works:
+                    continue
+                elif 'dask' in runtypename and not dask_works:
+                    continue
+                elif 'transonic' in runtypename and not transonic_works:
+                    continue
+                elif 'cuda' in runtypename and not cuda_works:
+                    continue
+                elif 'opencl' in runtypename and not opencl_works:
+                    continue
+                else:
+                    self._run_types[runtypename] = rt[1]
 
         self.testing = testing
         self.mem_div = 1
@@ -117,14 +94,17 @@ def __init__(
         os.makedirs(base_path, exist_ok=True)
         self._benchmark_filepath = os.path.join(base_path, self.__class__.__name__ + ".yml")
 
-        # Load config file if it exists, otherwise create an empty config
+        # Load benchmark file if it exists, otherwise create an empty config
         if not clear_benchmarks and os.path.exists(self._benchmark_filepath):
             with open(self._benchmark_filepath) as f:
                 self._benchmarks = yaml.load(f, Loader=yaml.FullLoader)
         else:
             self._benchmarks = {}
 
-        # check if the cfg dictionary has a key for every available run type
+        # Lowercase everything for backwards compatibility
+        self._benchmarks =  {k.lower(): v for k, v in self._benchmarks.items()}
+
+        # check if the benchmark dictionary has a key for every available run type
         for run_type_designation in self._run_types.keys():
             if run_type_designation not in self._benchmarks:
                 self._benchmarks[run_type_designation] = {}
@@ -143,6 +123,8 @@ def __init__(
                 .joinpath(self.__class__.__name__ + ".yml")
                 .read_text()
             )
+            # Lowercase everything for backwards compatibility
+            self._default_benchmarks =  {k.lower(): v for k, v in self._default_benchmarks.items()}
         except:
             self._default_benchmarks = []
 
@@ -164,6 +146,8 @@ def _run(self, *args, run_type=None, **kwargs):
         :return: the result and time taken
         """
 
+        run_type = run_type.lower()
+
         if run_type is None and self.verbose:
             print("Querying the Agent...")
             run_type = self.Agent.get_run_type(self, args, kwargs)
@@ -452,80 +436,3 @@ def run(self, *args, **kwargs):
         Should be overridden by the any class that inherits from this class
         """
         return self._run(*args, **kwargs)
-
-    def _run_opencl(*args, **kwargs):
-        """@public
-        Runs the OpenCL version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_unthreaded(*args, **kwargs):
-        """@public
-        Runs the cython unthreaded version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded(*args, **kwargs):
-        """@public
-        Runs the cython threaded version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded_static(*args, **kwargs):
-        """@public
-        Runs the cython threaded static version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded_dynamic(*args, **kwargs):
-        """@public
-        Runs the cython threaded dynamic version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded_guided(*args, **kwargs):
-        """@public
-        Runs the cython threaded guided version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_python(*args, **kwargs):
-        """@public
-        Runs the python version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_njit(*args, **kwargs):
-        """@public
-        Runs the njit version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_dask(*args, **kwargs):
-        """@public
-        Runs the dask version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_transonic(*args, **kwargs):
-        """@public
-        Runs the transonic version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_cuda(*args, **kwargs):
-        """@public
-        Runs the cuda version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass

From cf7afc6a2f55c7a08739d8091d494abc33ad9734 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Mon, 13 May 2024 16:27:58 +0100
Subject: [PATCH 02/14] added todo

---
 src/nanopyx/__agent__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py
index 683255c6..29e123f3 100644
--- a/src/nanopyx/__agent__.py
+++ b/src/nanopyx/__agent__.py
@@ -137,7 +137,7 @@ def _check_delay(self, run_type, runtime, runtimes_history, verbose=True):
             1. Calculates a probability that this delay is maintained
             2. Stores the delay factor and the probability
         """
-
+        # TODO test 
         threaded_runtypes = ["threaded", "threaded_static", "threaded_dynamic", "threaded_guided"]
 
         runtimes_history = np.array(runtimes_history)

From 35e35ca77cefed95fbe6e9e81a4871ba955d1c5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Mon, 13 May 2024 16:28:12 +0100
Subject: [PATCH 03/14] estimator of fastest device

---
 src/nanopyx/__opencl__.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/nanopyx/__opencl__.py b/src/nanopyx/__opencl__.py
index 8eeacfbe..b8830ded 100644
--- a/src/nanopyx/__opencl__.py
+++ b/src/nanopyx/__opencl__.py
@@ -8,8 +8,11 @@
     import pyopencl.array as cl_array
 
     devices = []
+    _fastest_device = None
+    max_perf = 0
+
     for platform in cl.get_platforms():
-        if "Microsoft" in platform.vendor:  # TODO this takes out integrated graphics
+        if "Microsoft" in platform.vendor:  # TODO this takes out emulated GPUs
             continue
         for dev in platform.get_devices():
             # check if the device is a GPU
@@ -19,7 +22,11 @@
                 cl_dp = False
             else:
                 cl_dp = False
-
+            
+            perf = dev.max_compute_units * dev.max_clock_frequency
+            if perf>max_perf:
+                max_perf = perf
+                _fastest_device = {"device": dev, "DP": cl_dp}
             devices.append({"device": dev, "DP": cl_dp})
 
 
@@ -28,6 +35,7 @@
     cl = None
     cl_array = None
     devices = None
+    _fastest_device = None
 
 
 def print_opencl_info():

From 6379a8d21cdfd5e63eb2e6abca1d5dff2b2fa6d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Mon, 13 May 2024 16:28:23 +0100
Subject: [PATCH 04/14] Testing new runtype naming

---
 .../nanopyx.core.transform._le_convolution.pyx      | 13 ++++++-------
 src/nanopyx/core/transform/_le_convolution.pyx      | 13 ++++++-------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
index 2cbc1ddf..e304597a 100644
--- a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
@@ -15,7 +15,7 @@ from libc.math cimport cos, sin
 from .__interpolation_tools__ import check_image, value2array
 from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 class Convolution(LiquidEngine):
@@ -26,11 +26,7 @@ class Convolution(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Conv2D"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True,
-            njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True,
-            verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, kernel, run_type=None):
         image = check_array(image)
@@ -85,8 +81,11 @@ class Convolution(LiquidEngine):
 
     % endfor
 
-    def _run_opencl(self, image, kernel, device):
+    def _run_opencl(self, image, kernel, device=None):
         
+        if device is None:
+            device = _fastest_device
+
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
diff --git a/src/nanopyx/core/transform/_le_convolution.pyx b/src/nanopyx/core/transform/_le_convolution.pyx
index 5b3b541b..1f4d5fa8 100644
--- a/src/nanopyx/core/transform/_le_convolution.pyx
+++ b/src/nanopyx/core/transform/_le_convolution.pyx
@@ -13,7 +13,7 @@ from libc.math cimport cos, sin
 from .__interpolation_tools__ import check_image, value2array
 from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 class Convolution(LiquidEngine):
@@ -24,11 +24,7 @@ class Convolution(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Conv2D"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True,
-            njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True,
-            verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, kernel, run_type=None):
         image = check_array(image)
@@ -213,8 +209,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
 
-    def _run_opencl(self, image, kernel, device):
+    def _run_opencl(self, image, kernel, device=None):
         
+        if device is None:
+            device = _fastest_device
+
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']

From b80ce16875ab47d7ff391b4b21be3f6fb2315dd8 Mon Sep 17 00:00:00 2001
From: Bruno Saraiva <bruno.msaraiva2@gmail.com>
Date: Tue, 14 May 2024 14:53:50 +0100
Subject: [PATCH 05/14] updated all LE classes

---
 src/mako_templates/_le_interpolation_base.pyx |  34 +--
 ...core.analysis._le_channel_registration.pyx |  13 +-
 ...pyx.core.analysis._le_drift_calculator.pyx |  19 +-
 .../nanopyx.core.transform._le_esrrf.pyx      |  11 +-
 .../nanopyx.core.transform._le_esrrf3d.pyx    |   4 +-
 ...nopyx.core.transform._le_nlm_denoising.pyx |  11 +-
 ...core.transform._le_patch_nlm_denoising.pyx |   9 +-
 ...core.transform._le_pixel_nlm_denoising.pyx |   9 +-
 ...nsform._le_radial_gradient_convergence.pyx |   7 +-
 .../nanopyx.core.transform._le_radiality.pyx  |  11 +-
 ....transform._le_roberts_cross_gradients.pyx |   9 +-
 ...yx.core.utils._le_mandelbrot_benchmark.pyx |  11 +-
 src/nanopyx/__liquid_engine__.py              |  15 +-
 .../analysis/_le_channel_registration.pyx     |  13 +-
 .../core/analysis/_le_drift_calculator.pyx    | 282 +-----------------
 src/nanopyx/core/transform/_le_esrrf.pyx      |  11 +-
 src/nanopyx/core/transform/_le_esrrf3d.pyx    |   8 +-
 .../transform/_le_interpolation_bicubic.pyx   |  34 +--
 .../_le_interpolation_catmull_rom.pyx         |  34 +--
 .../transform/_le_interpolation_lanczos.pyx   |  34 +--
 .../_le_interpolation_nearest_neighbor.pyx    |  34 +--
 .../core/transform/_le_nlm_denoising.pyx      |  11 +-
 .../transform/_le_patch_nlm_denoising.pyx     |   9 +-
 .../transform/_le_pixel_nlm_denoising.pyx     |   9 +-
 .../_le_radial_gradient_convergence.pyx       |   7 +-
 src/nanopyx/core/transform/_le_radiality.pyx  |  11 +-
 .../transform/_le_roberts_cross_gradients.pyx |   9 +-
 .../core/utils/_le_mandelbrot_benchmark.pyx   |  11 +-
 28 files changed, 200 insertions(+), 480 deletions(-)

diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx
index 62bc88bf..73806713 100644
--- a/src/mako_templates/_le_interpolation_base.pyx
+++ b/src/mako_templates/_le_interpolation_base.pyx
@@ -9,7 +9,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_${self.attr.inter_name}.h":
@@ -23,10 +23,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_${self.attr.inter_name}"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -65,8 +62,9 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -152,11 +150,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_${self.attr.inter_name}"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -199,7 +195,10 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -298,11 +297,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_${self.attr.inter_name}"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -339,7 +336,10 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
index 4adae44d..7d8ef454 100644
--- a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
+++ b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
@@ -9,7 +9,7 @@ from cython.parallel import parallel, prange
 from libc.math cimport sqrt,pow
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .ccm cimport _calculate_slice_ccm
 
 from .estimate_shift import GetMaxOptimizer
@@ -99,8 +99,7 @@ class ChannelRegistrationEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ChannelRegistrationEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None):
         return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type)
@@ -251,8 +250,12 @@ class ChannelRegistrationEstimator(LiquidEngine):
 
     % endfor
 
-    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device):
-        _runtype = "OpenCL_" + device["device"].name
+    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None):
+
+        if device is None:
+            device = _fastest_device
+
+        _runtype = "opencl"
         crsm = ShiftAndMagnify(verbose=False)
 
         cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32)
diff --git a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
index de121ac0..4b83448d 100644
--- a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
+++ b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
@@ -1,5 +1,5 @@
 <%!
-schedulers = ['unthreaded','threaded','threaded_guided','threaded_dynamic','threaded_static']
+schedulers = ['unthreaded','threaded']
 %># cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=False
 import time
 import scipy
@@ -27,10 +27,7 @@ class DriftEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "DriftEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing,
-            opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, 
-            threaded_dynamic_=False, threaded_guided_=False,
-            njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None):
         return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type)
@@ -129,19 +126,9 @@ class DriftEstimator(LiquidEngine):
             % elif sch=='threaded':
             for s in prange(n_slices):
             % else:
-            for s in prange(n_slices,schedule="${sch.split('_')[1]}"): 
+            for s in prange(n_slices): 
             %endif
                 output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
 
         return np.asarray(output).astype(np.float32)
     %endfor
-
-
-# % if sch=='unthreaded':
-#     for i in range(n_blocks):
-#     % elif sch=='threaded':
-#     for i in prange(n_blocks):
-#     % else:
-#     for i in prange(n_blocks,schedule="${sch.split('_')[1]}"):
-#     %endif
-#         average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0)
\ No newline at end of file
diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
index 9e40749e..ddfe4d05 100644
--- a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
@@ -12,7 +12,7 @@ from libc.math cimport cos, sin
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify
 from ._le_roberts_cross_gradients import GradientRobertsCross
@@ -26,10 +26,7 @@ class eSRRF(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_ST"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None):
         image = check_image(image)
@@ -40,6 +37,10 @@ class eSRRF(LiquidEngine):
         return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1):
+
+        if device is None:
+            device = _fastest_device
+
         # TODO doIntensityWeighting is irrelevant on gpu2
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
index 2f244a0b..2660ed1f 100644
--- a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
@@ -28,9 +28,7 @@ class eSRRF3D(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_3D"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True, verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         self._gradients_s_interpolated = None
         self._gradients_r_interpolated = None
         self._gradients_c_interpolated = None
diff --git a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
index d49153be..2f523166 100644
--- a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
@@ -14,7 +14,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 import os
 os.environ['PYOPENCL_NO_CACHE']='1'
@@ -38,9 +38,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -227,7 +224,11 @@ class NLMDenoising(LiquidEngine):
         %endfor
     
 
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        
+        if device is None:
+            device = _fastest_device
+        
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
index d828e973..e31a3102 100644
--- a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
@@ -12,7 +12,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_integral_image.h":
@@ -31,9 +31,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_patch"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -231,7 +228,9 @@ class NLMDenoising(LiquidEngine):
     %endfor
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
diff --git a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
index 32ba7c96..750cd7b4 100644
--- a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
@@ -14,7 +14,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_patch_distance.h":
@@ -30,9 +30,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_pixel"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -143,7 +140,9 @@ class NLMDenoising(LiquidEngine):
         %endfor
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
index 651e6e50..a8896ffc 100644
--- a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
@@ -8,7 +8,7 @@ cimport numpy as np
 from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pow
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 from .__interpolation_tools__ import check_image
 
@@ -24,8 +24,6 @@ class RadialGradientConvergence(LiquidEngine):
         self._designation = "RadialGradientConvergence"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
 
@@ -108,6 +106,9 @@ class RadialGradientConvergence(LiquidEngine):
     
     def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1):
 
+        if device is None:
+            device = _fastest_device
+
         # gradient gxgymag*mag*size
         # image_interp = mag*size
         # output = image_interp
diff --git a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
index 14aa2fb5..89dd24c7 100644
--- a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
@@ -9,7 +9,7 @@ from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pi, fabs, cos, sin
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .__interpolation_tools__ import check_image
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify
@@ -35,8 +35,6 @@ class Radiality(LiquidEngine):
         self._designation = "Radiality"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=False, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): 
@@ -49,7 +47,7 @@ class Radiality(LiquidEngine):
         image_interp = check_image(image_interp)
         return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting)
 
-    def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
+    """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
 
         cdef int _magnification = magnification
         cdef int _border = border
@@ -85,7 +83,7 @@ class Radiality(LiquidEngine):
                         else:
                             imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w)
 
-        return np.asarray(imRad)
+        return np.asarray(imRad)"""
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
@@ -133,6 +131,9 @@ class Radiality(LiquidEngine):
     
     def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1):
 
+        if device is None:
+            device = _fastest_device
+
         cl_ctx = cl.Context([device['device']])
         cl_queue = cl.CommandQueue(cl_ctx)
 
diff --git a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
index 48967a41..b91d7d53 100644
--- a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
@@ -4,7 +4,7 @@ schedulers = ['threaded','threaded_guided','threaded_dynamic','threaded_static']
 
 import numpy as np
 cimport numpy as np
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 
 from cython.parallel import prange
@@ -19,8 +19,6 @@ class GradientRobertsCross(LiquidEngine):
         self._designation = "GradientRobertsCross"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, run_type = None):
@@ -63,7 +61,10 @@ class GradientRobertsCross(LiquidEngine):
         return gradient_col, gradient_row
     % endfor
 
-    def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1):
+    def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1):
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
index 0718f968..df126033 100644
--- a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
+++ b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
@@ -9,7 +9,7 @@ cimport numpy as np
 from cython.parallel import prange
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot
 from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot
 
@@ -25,9 +25,7 @@ class MandelbrotBenchmark(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Mandelbrot_Benchmark"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True,
+            clear_benchmarks=clear_benchmarks, testing=testing,
             verbose=verbose)
 
     def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray:
@@ -45,7 +43,10 @@ class MandelbrotBenchmark(LiquidEngine):
     def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1):
         return super().benchmark(size, r_start, r_end, c_start, c_end)
 
-    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray:
+    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray:
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py
index d911f872..42f80282 100644
--- a/src/nanopyx/__liquid_engine__.py
+++ b/src/nanopyx/__liquid_engine__.py
@@ -70,15 +70,15 @@ def __init__(
             if rt[0].startswith('_run_'):
                 runtypename = '_'.join(rt[0].split('_')[2:]).lower()
                 # TODO Recheck this logic TODO
-                if 'numba' in runtypename and not njit_works:
+                if 'numba' in runtypename and not njit_works():
                     continue
-                elif 'dask' in runtypename and not dask_works:
+                elif 'dask' in runtypename and not dask_works():
                     continue
-                elif 'transonic' in runtypename and not transonic_works:
+                elif 'transonic' in runtypename and not transonic_works():
                     continue
-                elif 'cuda' in runtypename and not cuda_works:
+                elif 'cuda' in runtypename and not cuda_works():
                     continue
-                elif 'opencl' in runtypename and not opencl_works:
+                elif 'opencl' in runtypename and not opencl_works():
                     continue
                 else:
                     self._run_types[runtypename] = rt[1]
@@ -103,6 +103,7 @@ def __init__(
 
         # Lowercase everything for backwards compatibility
         self._benchmarks =  {k.lower(): v for k, v in self._benchmarks.items()}
+        print(self._benchmarks.keys())
 
         # check if the benchmark dictionary has a key for every available run type
         for run_type_designation in self._run_types.keys():
@@ -125,6 +126,7 @@ def __init__(
             )
             # Lowercase everything for backwards compatibility
             self._default_benchmarks =  {k.lower(): v for k, v in self._default_benchmarks.items()}
+            print(self._default_benchmarks.keys())
         except:
             self._default_benchmarks = []
 
@@ -146,7 +148,8 @@ def _run(self, *args, run_type=None, **kwargs):
         :return: the result and time taken
         """
 
-        run_type = run_type.lower()
+        if run_type is not None:
+            run_type = run_type.lower()
 
         if run_type is None and self.verbose:
             print("Querying the Agent...")
diff --git a/src/nanopyx/core/analysis/_le_channel_registration.pyx b/src/nanopyx/core/analysis/_le_channel_registration.pyx
index 0ecff710..8d6945e3 100644
--- a/src/nanopyx/core/analysis/_le_channel_registration.pyx
+++ b/src/nanopyx/core/analysis/_le_channel_registration.pyx
@@ -7,7 +7,7 @@ from cython.parallel import parallel, prange
 from libc.math cimport sqrt,pow
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .ccm cimport _calculate_slice_ccm
 
 from .estimate_shift import GetMaxOptimizer
@@ -97,8 +97,7 @@ class ChannelRegistrationEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ChannelRegistrationEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None):
         return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type)
@@ -777,8 +776,12 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
 
-    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device):
-        _runtype = "OpenCL_" + device["device"].name
+    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None):
+
+        if device is None:
+            device = _fastest_device
+
+        _runtype = "opencl"
         crsm = ShiftAndMagnify(verbose=False)
 
         cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32)
diff --git a/src/nanopyx/core/analysis/_le_drift_calculator.pyx b/src/nanopyx/core/analysis/_le_drift_calculator.pyx
index 8c0658e0..d8f0154a 100644
--- a/src/nanopyx/core/analysis/_le_drift_calculator.pyx
+++ b/src/nanopyx/core/analysis/_le_drift_calculator.pyx
@@ -25,10 +25,7 @@ class DriftEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "DriftEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing,
-            opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, 
-            threaded_dynamic_=False, threaded_guided_=False,
-            njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None):
         return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type)
@@ -214,280 +211,3 @@ class DriftEstimator(LiquidEngine):
                 output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
 
         return np.asarray(output).astype(np.float32)
-    def _run_threaded_guided(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
-        if not _check_even_square(image):
-            image = _make_even_square(image)
-
-        # get image dimensions, should already be an even square
-        cdef int n_slices = image.shape[0]
-        cdef int n_rows = image.shape[1]
-        cdef int n_cols = image.shape[2]
-
-        # ensures time averaging has an acceptable value
-        if time_averaging < 1:
-            time_averaging = 1
-        elif time_averaging > (n_slices//2):
-            time_averaging = n_slices//2
-
-        cdef int n_blocks = n_slices // time_averaging
-        
-        averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32)
-
-        cdef int idx
-        if time_averaging == 1:
-            averaged = image
-        else:
-            for idx in range(n_blocks):
-                averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0)
-
-        cdef float[:, :, :] ccm
-        cdef int row_start
-        cdef int col_start
-        if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols:
-            row_start = int(n_rows / 2 - max_drift)
-            col_start = int(n_cols / 2 - max_drift)
-            ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)]
-        else:
-            ccm = _calculate_ccm(averaged, ref_option)
-
-        cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32)
-        
-        cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32)
-
-        cdef float bias_row = 0.0
-        cdef float bias_col = 0.0
-        cdef float shift_x, shift_y
-
-        cdef int i
-        for i in range(n_blocks):
-
-            optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32))
-            shift_y, shift_x = optimizer.get_max()
-
-            drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3)
-            drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3)
-
-            if i == 0:
-                bias_row = drift_table[i, 0]
-                bias_col = drift_table[i, 1]
-            drift_table[i, 0] = drift_table[i, 0] - bias_row
-            drift_table[i, 1] = drift_table[i, 1] - bias_col
-
-            if ref_option == 1 and i > 0:
-                drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0]
-                drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1]
-
-        cdef float[:] drift_x, drift_y
-        if time_averaging > 1:
-            lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int)
-            x_interpolator = interp1d(
-                lin, np.array(drift_table[:, 1]), kind="cubic"
-            ) 
-            y_interpolator = interp1d(
-                lin, np.array(drift_table[:, 0]), kind="cubic"
-            )
-
-            drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 1] = drift_x
-            drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 2] = drift_y
-
-        else:
-            output[:, 1] = drift_table[:, 1] # switch order of rows and cols
-            output[:, 2] = drift_table[:, 0] # switch order of rows and cols
-
-        cdef int s
-        with nogil:
-            for s in prange(n_slices,schedule="guided"): 
-                output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
-
-        return np.asarray(output).astype(np.float32)
-    def _run_threaded_dynamic(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
-        if not _check_even_square(image):
-            image = _make_even_square(image)
-
-        # get image dimensions, should already be an even square
-        cdef int n_slices = image.shape[0]
-        cdef int n_rows = image.shape[1]
-        cdef int n_cols = image.shape[2]
-
-        # ensures time averaging has an acceptable value
-        if time_averaging < 1:
-            time_averaging = 1
-        elif time_averaging > (n_slices//2):
-            time_averaging = n_slices//2
-
-        cdef int n_blocks = n_slices // time_averaging
-        
-        averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32)
-
-        cdef int idx
-        if time_averaging == 1:
-            averaged = image
-        else:
-            for idx in range(n_blocks):
-                averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0)
-
-        cdef float[:, :, :] ccm
-        cdef int row_start
-        cdef int col_start
-        if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols:
-            row_start = int(n_rows / 2 - max_drift)
-            col_start = int(n_cols / 2 - max_drift)
-            ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)]
-        else:
-            ccm = _calculate_ccm(averaged, ref_option)
-
-        cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32)
-        
-        cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32)
-
-        cdef float bias_row = 0.0
-        cdef float bias_col = 0.0
-        cdef float shift_x, shift_y
-
-        cdef int i
-        for i in range(n_blocks):
-
-            optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32))
-            shift_y, shift_x = optimizer.get_max()
-
-            drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3)
-            drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3)
-
-            if i == 0:
-                bias_row = drift_table[i, 0]
-                bias_col = drift_table[i, 1]
-            drift_table[i, 0] = drift_table[i, 0] - bias_row
-            drift_table[i, 1] = drift_table[i, 1] - bias_col
-
-            if ref_option == 1 and i > 0:
-                drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0]
-                drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1]
-
-        cdef float[:] drift_x, drift_y
-        if time_averaging > 1:
-            lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int)
-            x_interpolator = interp1d(
-                lin, np.array(drift_table[:, 1]), kind="cubic"
-            ) 
-            y_interpolator = interp1d(
-                lin, np.array(drift_table[:, 0]), kind="cubic"
-            )
-
-            drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 1] = drift_x
-            drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 2] = drift_y
-
-        else:
-            output[:, 1] = drift_table[:, 1] # switch order of rows and cols
-            output[:, 2] = drift_table[:, 0] # switch order of rows and cols
-
-        cdef int s
-        with nogil:
-            for s in prange(n_slices,schedule="dynamic"): 
-                output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
-
-        return np.asarray(output).astype(np.float32)
-    def _run_threaded_static(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
-        if not _check_even_square(image):
-            image = _make_even_square(image)
-
-        # get image dimensions, should already be an even square
-        cdef int n_slices = image.shape[0]
-        cdef int n_rows = image.shape[1]
-        cdef int n_cols = image.shape[2]
-
-        # ensures time averaging has an acceptable value
-        if time_averaging < 1:
-            time_averaging = 1
-        elif time_averaging > (n_slices//2):
-            time_averaging = n_slices//2
-
-        cdef int n_blocks = n_slices // time_averaging
-        
-        averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32)
-
-        cdef int idx
-        if time_averaging == 1:
-            averaged = image
-        else:
-            for idx in range(n_blocks):
-                averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0)
-
-        cdef float[:, :, :] ccm
-        cdef int row_start
-        cdef int col_start
-        if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols:
-            row_start = int(n_rows / 2 - max_drift)
-            col_start = int(n_cols / 2 - max_drift)
-            ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)]
-        else:
-            ccm = _calculate_ccm(averaged, ref_option)
-
-        cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32)
-        
-        cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32)
-
-        cdef float bias_row = 0.0
-        cdef float bias_col = 0.0
-        cdef float shift_x, shift_y
-
-        cdef int i
-        for i in range(n_blocks):
-
-            optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32))
-            shift_y, shift_x = optimizer.get_max()
-
-            drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3)
-            drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3)
-
-            if i == 0:
-                bias_row = drift_table[i, 0]
-                bias_col = drift_table[i, 1]
-            drift_table[i, 0] = drift_table[i, 0] - bias_row
-            drift_table[i, 1] = drift_table[i, 1] - bias_col
-
-            if ref_option == 1 and i > 0:
-                drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0]
-                drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1]
-
-        cdef float[:] drift_x, drift_y
-        if time_averaging > 1:
-            lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int)
-            x_interpolator = interp1d(
-                lin, np.array(drift_table[:, 1]), kind="cubic"
-            ) 
-            y_interpolator = interp1d(
-                lin, np.array(drift_table[:, 0]), kind="cubic"
-            )
-
-            drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 1] = drift_x
-            drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 2] = drift_y
-
-        else:
-            output[:, 1] = drift_table[:, 1] # switch order of rows and cols
-            output[:, 2] = drift_table[:, 0] # switch order of rows and cols
-
-        cdef int s
-        with nogil:
-            for s in prange(n_slices,schedule="static"): 
-                output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
-
-        return np.asarray(output).astype(np.float32)
-
-
-# % if sch=='unthreaded':
-#     for i in range(n_blocks):
-#     % elif sch=='threaded':
-#     for i in prange(n_blocks):
-#     % else:
-#     for i in prange(n_blocks,schedule="static"):
-#     %endif
-#         average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0)
\ No newline at end of file
diff --git a/src/nanopyx/core/transform/_le_esrrf.pyx b/src/nanopyx/core/transform/_le_esrrf.pyx
index 4c25f36b..bc22ee34 100644
--- a/src/nanopyx/core/transform/_le_esrrf.pyx
+++ b/src/nanopyx/core/transform/_le_esrrf.pyx
@@ -10,7 +10,7 @@ from libc.math cimport cos, sin
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify
 from ._le_roberts_cross_gradients import GradientRobertsCross
@@ -24,10 +24,7 @@ class eSRRF(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_ST"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None):
         image = check_image(image)
@@ -38,6 +35,10 @@ class eSRRF(LiquidEngine):
         return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1):
+
+        if device is None:
+            device = _fastest_device
+
         # TODO doIntensityWeighting is irrelevant on gpu2
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
diff --git a/src/nanopyx/core/transform/_le_esrrf3d.pyx b/src/nanopyx/core/transform/_le_esrrf3d.pyx
index b3c2c5a7..662c1568 100644
--- a/src/nanopyx/core/transform/_le_esrrf3d.pyx
+++ b/src/nanopyx/core/transform/_le_esrrf3d.pyx
@@ -26,9 +26,7 @@ class eSRRF3D(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_3D"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True, verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         self._gradients_s_interpolated = None
         self._gradients_r_interpolated = None
         self._gradients_c_interpolated = None
@@ -43,10 +41,10 @@ class eSRRF3D(LiquidEngine):
         if image.dtype != np.float32:
             image = image.astype(np.float32)
         if len(image.shape) == 4:
-            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
+            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
         elif len(image.shape) == 3:
             image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
-            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
+            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
 
     def benchmark(self, image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
         if image.dtype != np.float32:
diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
index 7a348921..81d3a0d2 100644
--- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_bicubic.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_bicubic"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_bicubic"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_bicubic"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
index 99ca8d07..72c0b0f7 100644
--- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_catmull_rom.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_catmull_rom"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_catmull_rom"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_catmull_rom"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
index bf1cd551..4c696f48 100644
--- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_lanczos.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_lanczos"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_lanczos"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_lanczos"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
index 0df765ae..ecf3510b 100644
--- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_nearest_neighbor.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_nearest_neighbor"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,9 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -238,11 +236,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_nearest_neighbor"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +281,10 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -512,11 +511,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_nearest_neighbor"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +550,10 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/core/transform/_le_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_nlm_denoising.pyx
index d13a94de..d7a97382 100644
--- a/src/nanopyx/core/transform/_le_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_nlm_denoising.pyx
@@ -12,7 +12,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 import os
 os.environ['PYOPENCL_NO_CACHE']='1'
@@ -36,9 +36,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -399,7 +396,11 @@ class NLMDenoising(LiquidEngine):
 
     
 
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        
+        if device is None:
+            device = _fastest_device
+        
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
index 506ce316..c9537358 100644
--- a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
@@ -10,7 +10,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_integral_image.h":
@@ -29,9 +29,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_patch"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -423,7 +420,9 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
diff --git a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
index 206d8824..94908e91 100644
--- a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
@@ -12,7 +12,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_patch_distance.h":
@@ -28,9 +28,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_pixel"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -373,7 +370,9 @@ class NLMDenoising(LiquidEngine):
 
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        if device is None:
+            device = _fastest_device
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
index 69f4f656..464ae495 100644
--- a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
+++ b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
@@ -6,7 +6,7 @@ cimport numpy as np
 from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pow
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 from .__interpolation_tools__ import check_image
 
@@ -22,8 +22,6 @@ class RadialGradientConvergence(LiquidEngine):
         self._designation = "RadialGradientConvergence"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
 
@@ -181,6 +179,9 @@ class RadialGradientConvergence(LiquidEngine):
     
     def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1):
 
+        if device is None:
+            device = _fastest_device
+
         # gradient gxgymag*mag*size
         # image_interp = mag*size
         # output = image_interp
diff --git a/src/nanopyx/core/transform/_le_radiality.pyx b/src/nanopyx/core/transform/_le_radiality.pyx
index 76a5c313..663a7e30 100644
--- a/src/nanopyx/core/transform/_le_radiality.pyx
+++ b/src/nanopyx/core/transform/_le_radiality.pyx
@@ -7,7 +7,7 @@ from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pi, fabs, cos, sin
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .__interpolation_tools__ import check_image
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify
@@ -33,8 +33,6 @@ class Radiality(LiquidEngine):
         self._designation = "Radiality"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=False, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): 
@@ -47,7 +45,7 @@ class Radiality(LiquidEngine):
         image_interp = check_image(image_interp)
         return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting)
 
-    def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
+    """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
 
         cdef int _magnification = magnification
         cdef int _border = border
@@ -83,7 +81,7 @@ class Radiality(LiquidEngine):
                         else:
                             imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w)
 
-        return np.asarray(imRad)
+        return np.asarray(imRad)"""
 
     def _run_threaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
 
@@ -233,6 +231,9 @@ class Radiality(LiquidEngine):
     
     def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1):
 
+        if device is None:
+            device = _fastest_device
+
         cl_ctx = cl.Context([device['device']])
         cl_queue = cl.CommandQueue(cl_ctx)
 
diff --git a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
index 28fd22f5..6a25034e 100644
--- a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
+++ b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
@@ -2,7 +2,7 @@
 
 import numpy as np
 cimport numpy as np
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 
 from cython.parallel import prange
@@ -17,8 +17,6 @@ class GradientRobertsCross(LiquidEngine):
         self._designation = "GradientRobertsCross"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, run_type = None):
@@ -91,7 +89,10 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
 
-    def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1):
+    def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1):
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
index a61fca34..f221584c 100644
--- a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
+++ b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
@@ -7,7 +7,7 @@ cimport numpy as np
 from cython.parallel import prange
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot
 from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot
 
@@ -23,9 +23,7 @@ class MandelbrotBenchmark(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Mandelbrot_Benchmark"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True,
+            clear_benchmarks=clear_benchmarks, testing=testing,
             verbose=verbose)
 
     def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray:
@@ -43,7 +41,10 @@ class MandelbrotBenchmark(LiquidEngine):
     def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1):
         return super().benchmark(size, r_start, r_end, c_start, c_end)
 
-    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray:
+    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray:
+
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])

From 7ffabfafa55feb740a0d17a5bcfd60720fe8a2b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Thu, 16 May 2024 17:51:43 +0100
Subject: [PATCH 06/14] Proof of concept testing for run_type classification

---
 src/mako_templates/_le_interpolation_base.pyx |  8 ++++++
 src/nanopyx/__agent__.py                      | 11 ++++++--
 src/nanopyx/__liquid_engine__.py              | 20 ++++++++++---
 .../transform/_le_interpolation_bicubic.pyx   | 28 +++++++++++++++++++
 .../_le_interpolation_catmull_rom.pyx         | 28 +++++++++++++++++++
 .../transform/_le_interpolation_lanczos.pyx   | 28 +++++++++++++++++++
 .../_le_interpolation_nearest_neighbor.pyx    | 28 +++++++++++++++++++
 7 files changed, 144 insertions(+), 7 deletions(-)

diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx
index 73806713..824d65de 100644
--- a/src/mako_templates/_le_interpolation_base.pyx
+++ b/src/mako_templates/_le_interpolation_base.pyx
@@ -63,6 +63,9 @@ class ShiftAndMagnify(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
     def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         # QUEUE AND CONTEXT
@@ -113,6 +116,11 @@ class ShiftAndMagnify(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py
index 29e123f3..b3827be5 100644
--- a/src/nanopyx/__agent__.py
+++ b/src/nanopyx/__agent__.py
@@ -53,11 +53,14 @@ def __init__(
 
         self.delayed_runtypes = {}  # Store runtypes as keys and their values as (delay_factor, delay_prob)
 
-    def _get_ordered_run_types(self, fn, args, kwargs):
+    def _get_ordered_run_types(self, fn, args, kwargs,_possible_runtypes=[]):
         """@public
         Retrieves an ordered list of run_types for the given args and kwargs
         """
 
+        if not _possible_runtypes:
+            _possible_runtypes = fn.run_types.keys()
+
         # str representation of the arguments and their corresponding 'norm'
         repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs)
         # dictionary to hold speeds
@@ -68,6 +71,8 @@ def _get_ordered_run_types(self, fn, args, kwargs):
         # fn._benchmarks is a dictionary of dictionaries. The first key is the run_type, the second key is the repr_args
         # Check every run_type for the most similar args
         for run_type in fn._run_types:
+            if run_type not in _possible_runtypes:
+                continue
             if repr_args in fn._benchmarks[run_type]:
                 run_info = fn._benchmarks[run_type][repr_args][1:]
             else:
@@ -196,13 +201,13 @@ def _adjust_times(self, fast_device_times, slow_device_times):
 
         return adjusted_times
 
-    def get_run_type(self, fn, args, kwargs):
+    def get_run_type(self, fn, args, kwargs,_possible_runtypes=[]):
         """
         Returns the best run_type for the given args and kwargs
         """
 
         # Get list of run types
-        fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs)
+        fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes)
 
         # Penalize the average time a run_type had if that run_type was delayed in previous runs
         if len(self.delayed_runtypes.keys()) > 0:
diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py
index 42f80282..c40eabb5 100644
--- a/src/nanopyx/__liquid_engine__.py
+++ b/src/nanopyx/__liquid_engine__.py
@@ -158,10 +158,22 @@ def _run(self, *args, run_type=None, **kwargs):
         elif run_type is None:
             run_type = self.Agent.get_run_type(self, args, kwargs)
         elif run_type not in self._run_types:
-            print(f"Unexpected run type {run_type}")
-            print("Querying the Agent...")
-            run_type = self.Agent.get_run_type(self, args, kwargs)
-            print(f"Agent chose: {run_type}")
+            
+            # Check if the tags in the run_types
+            _possible_runtypes = [rt for rt in self._run_types.keys() if f"@{run_type}" in self._run_types[rt].__doc__]
+
+            if not _possible_runtypes:
+
+                print(f"Unexpected run type {run_type}")
+                print("Querying the Agent...")
+                run_type = self.Agent.get_run_type(self, args, kwargs)
+                print(f"Agent chose: {run_type}")
+            
+            else:
+
+                print(f"Choosing between all {run_type} implementations")
+                run_type = self.Agent.get_run_type(self, args, kwargs,_possible_runtypes)
+                print(f"Agent chose: {run_type}")
 
         # try to run
         try:
diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
index 81d3a0d2..8018d8a7 100644
--- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
@@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
     def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         # QUEUE AND CONTEXT
@@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
index 72c0b0f7..d225e3b4 100644
--- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
@@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
     def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         # QUEUE AND CONTEXT
@@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
index 4c696f48..d95c55de 100644
--- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
@@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
     def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         # QUEUE AND CONTEXT
@@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
index ecf3510b..76dd3f0e 100644
--- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
@@ -61,6 +61,9 @@ class ShiftAndMagnify(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
     def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         # QUEUE AND CONTEXT
@@ -110,6 +113,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -134,6 +142,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -158,6 +171,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -182,6 +200,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -206,6 +229,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]

From d04ca3018615daaf3dc6fe37b8e47cf014d210db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Fri, 17 May 2024 11:05:34 +0100
Subject: [PATCH 07/14] Mssing underscore in var name

---
 src/nanopyx/__agent__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py
index b3827be5..4939a1c0 100644
--- a/src/nanopyx/__agent__.py
+++ b/src/nanopyx/__agent__.py
@@ -59,7 +59,7 @@ def _get_ordered_run_types(self, fn, args, kwargs,_possible_runtypes=[]):
         """
 
         if not _possible_runtypes:
-            _possible_runtypes = fn.run_types.keys()
+            _possible_runtypes = fn._run_types.keys()
 
         # str representation of the arguments and their corresponding 'norm'
         repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs)

From 4d9e5208762a90c993bfa073252dfd77f08f17ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Fri, 17 May 2024 11:32:40 +0100
Subject: [PATCH 08/14] Added tags to all methods

---
 src/mako_templates/_le_interpolation_base.pyx | 25 +++++++-
 ...core.analysis._le_channel_registration.pyx | 11 +++-
 ...pyx.core.analysis._le_drift_calculator.pyx |  8 ++-
 ...nanopyx.core.transform._le_convolution.pyx | 31 +++++++++-
 .../nanopyx.core.transform._le_esrrf.pyx      | 13 +++-
 .../nanopyx.core.transform._le_esrrf3d.pyx    | 10 ++-
 ...nopyx.core.transform._le_nlm_denoising.pyx | 16 ++++-
 ...core.transform._le_patch_nlm_denoising.pyx | 15 +++++
 ...core.transform._le_pixel_nlm_denoising.pyx | 14 +++++
 ...nsform._le_radial_gradient_convergence.pyx | 15 ++++-
 .../nanopyx.core.transform._le_radiality.pyx  | 10 ++-
 ....transform._le_roberts_cross_gradients.pyx | 14 ++++-
 ...yx.core.utils._le_mandelbrot_benchmark.pyx | 21 ++++++-
 .../analysis/_le_channel_registration.pyx     | 28 ++++++++-
 .../core/analysis/_le_drift_calculator.pyx    | 11 +++-
 .../core/transform/_le_convolution.pyx        | 52 ++++++++++++++--
 src/nanopyx/core/transform/_le_esrrf.pyx      | 28 ++++++++-
 src/nanopyx/core/transform/_le_esrrf3d.pyx    | 39 +++++++++---
 .../transform/_le_interpolation_bicubic.pyx   | 62 ++++++++++++++++---
 .../_le_interpolation_catmull_rom.pyx         | 62 ++++++++++++++++---
 .../transform/_le_interpolation_lanczos.pyx   | 62 ++++++++++++++++---
 .../_le_interpolation_nearest_neighbor.pyx    | 62 ++++++++++++++++---
 .../core/transform/_le_nlm_denoising.pyx      | 31 +++++++++-
 .../transform/_le_patch_nlm_denoising.pyx     | 30 +++++++++
 .../transform/_le_pixel_nlm_denoising.pyx     | 31 ++++++++++
 .../_le_radial_gradient_convergence.pyx       | 33 ++++++++--
 src/nanopyx/core/transform/_le_radiality.pyx  | 28 +++++++--
 .../transform/_le_roberts_cross_gradients.pyx | 29 ++++++++-
 .../core/utils/_le_mandelbrot_benchmark.pyx   | 36 ++++++++++-
 29 files changed, 741 insertions(+), 86 deletions(-)

diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx
index 824d65de..1503d573 100644
--- a/src/mako_templates/_le_interpolation_base.pyx
+++ b/src/mako_templates/_le_interpolation_base.pyx
@@ -118,7 +118,9 @@ class ShiftAndMagnify(LiquidEngine):
     def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
         """
         @cpu
+        % if sch!='unthreaded':
         @threaded
+        % endif
         @cython
         """
         cdef int nFrames = image.shape[0]
@@ -204,7 +206,9 @@ class ShiftScaleRotate(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
     def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -259,6 +263,13 @@ class ShiftScaleRotate(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -345,7 +356,9 @@ class PolarTransform(LiquidEngine):
         return super().benchmark(image, nrow, ncol, scale)
 
     def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -404,7 +417,13 @@ class PolarTransform(LiquidEngine):
         
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
index 7d8ef454..ec8db173 100644
--- a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
+++ b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
@@ -109,6 +109,13 @@ class ChannelRegistrationEstimator(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         _runtype = "${sch}".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -251,7 +258,9 @@ class ChannelRegistrationEstimator(LiquidEngine):
     % endfor
 
     def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
index 4b83448d..1c928af6 100644
--- a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
+++ b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
@@ -37,7 +37,13 @@ class DriftEstimator(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         if not _check_even_square(image):
             image = _make_even_square(image)
 
diff --git a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
index e304597a..fa72d800 100644
--- a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
@@ -37,7 +37,13 @@ class Convolution(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -82,7 +88,9 @@ class Convolution(LiquidEngine):
     % endfor
 
     def _run_opencl(self, image, kernel, device=None):
-        
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -116,16 +124,35 @@ class Convolution(LiquidEngine):
         return image_out
 
     def _run_python(self, image, kernel):
+        """
+        @cpu
+        """
         return convolution2D_python(image, kernel).astype(np.float32)
 
     def _run_transonic(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_transonic(image, kernel).astype(np.float32)
 
     def _run_dask(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_dask(image, kernel).astype(np.float32)
 
     def _run_cuda(self, image, kernel):
+        """
+        @gpu
+        """
         return convolution2D_cuda(image, kernel).astype(np.float32)
 
     def _run_njit(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         return convolution2D_numba(image, kernel).astype(np.float32)
diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
index ddfe4d05..09710307 100644
--- a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
@@ -37,7 +37,9 @@ class eSRRF(LiquidEngine):
         return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -152,6 +154,11 @@ class eSRRF(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "${sch}".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -167,6 +174,10 @@ class eSRRF(LiquidEngine):
     % endfor
 
     def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @cython
+        """
         runtype = "Unthreaded"
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
index 2660ed1f..e10253b3 100644
--- a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
@@ -58,8 +58,14 @@ class eSRRF3D(LiquidEngine):
             return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     % for sch in schedulers:
-    def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
diff --git a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
index 2f523166..e854107f 100644
--- a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
@@ -73,6 +73,9 @@ class NLMDenoising(LiquidEngine):
 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -80,6 +83,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -158,6 +165,11 @@ class NLMDenoising(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -225,7 +237,9 @@ class NLMDenoising(LiquidEngine):
     
 
     def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
-        
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         
diff --git a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
index e31a3102..40ceeacf 100644
--- a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
@@ -65,6 +65,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma)
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -72,6 +75,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -150,6 +157,11 @@ class NLMDenoising(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -229,6 +241,9 @@ class NLMDenoising(LiquidEngine):
     
         
     def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray:
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         # QUEUE AND CONTEXT
diff --git a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
index 750cd7b4..0d90e2a4 100644
--- a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
@@ -64,6 +64,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False)
@@ -72,6 +75,13 @@ class NLMDenoising(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -141,6 +151,10 @@ class NLMDenoising(LiquidEngine):
     
         
     def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        @cython
+        """
         if device is None:
             device = _fastest_device
         cl_ctx = cl.Context([device['device']])
diff --git a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
index a8896ffc..3ad05ca6 100644
--- a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
@@ -41,7 +41,10 @@ class RadialGradientConvergence(LiquidEngine):
         return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting)
 
     def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -71,7 +74,11 @@ class RadialGradientConvergence(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -105,7 +112,9 @@ class RadialGradientConvergence(LiquidEngine):
 
     
     def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
index 89dd24c7..4b4ac4b5 100644
--- a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
@@ -87,7 +87,11 @@ class Radiality(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -130,7 +134,9 @@ class Radiality(LiquidEngine):
 
     
     def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
index b91d7d53..d3e8ae1c 100644
--- a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
@@ -30,7 +30,10 @@ class GradientRobertsCross(LiquidEngine):
         return super().benchmark(image)
     
     def _run_unthreaded(self, float[:,:,:] image):
-
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
         cdef float [:,:,:] gradient_row = np.zeros_like(image)
@@ -44,6 +47,11 @@ class GradientRobertsCross(LiquidEngine):
     
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -62,7 +70,9 @@ class GradientRobertsCross(LiquidEngine):
     % endfor
 
     def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
index df126033..a8f4fcf9 100644
--- a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
+++ b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
@@ -44,7 +44,9 @@ class MandelbrotBenchmark(LiquidEngine):
         return super().benchmark(size, r_start, r_end, c_start, c_end)
 
     def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray:
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -78,6 +80,10 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot.get()
 
     def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -95,6 +101,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -116,11 +127,19 @@ class MandelbrotBenchmark(LiquidEngine):
     % endfor
 
     def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot
 
     def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot
diff --git a/src/nanopyx/core/analysis/_le_channel_registration.pyx b/src/nanopyx/core/analysis/_le_channel_registration.pyx
index 8d6945e3..09c6e4c0 100644
--- a/src/nanopyx/core/analysis/_le_channel_registration.pyx
+++ b/src/nanopyx/core/analysis/_le_channel_registration.pyx
@@ -106,6 +106,10 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return super().benchmark(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity)
 
     def _run_unthreaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @cython
+        """
         _runtype = "unthreaded".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -240,6 +244,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -374,6 +383,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded_guided(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded_guided".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -508,6 +522,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded_dynamic(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded_dynamic".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -642,6 +661,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded_static(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded_static".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -777,7 +801,9 @@ class ChannelRegistrationEstimator(LiquidEngine):
 
 
     def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/nanopyx/core/analysis/_le_drift_calculator.pyx b/src/nanopyx/core/analysis/_le_drift_calculator.pyx
index d8f0154a..25165f7f 100644
--- a/src/nanopyx/core/analysis/_le_drift_calculator.pyx
+++ b/src/nanopyx/core/analysis/_le_drift_calculator.pyx
@@ -34,7 +34,10 @@ class DriftEstimator(LiquidEngine):
         return super().benchmark(image, time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option)
 
     def _run_unthreaded(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
+        """
+        @cpu
+        @cython
+        """
         if not _check_even_square(image):
             image = _make_even_square(image)
 
@@ -123,7 +126,11 @@ class DriftEstimator(LiquidEngine):
 
         return np.asarray(output).astype(np.float32)
     def _run_threaded(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if not _check_even_square(image):
             image = _make_even_square(image)
 
diff --git a/src/nanopyx/core/transform/_le_convolution.pyx b/src/nanopyx/core/transform/_le_convolution.pyx
index 1f4d5fa8..ce2fde47 100644
--- a/src/nanopyx/core/transform/_le_convolution.pyx
+++ b/src/nanopyx/core/transform/_le_convolution.pyx
@@ -34,7 +34,10 @@ class Convolution(LiquidEngine):
         return super().benchmark(image, kernel)
 
     def _run_unthreaded(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -69,7 +72,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -104,7 +111,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded_guided(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -139,7 +150,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded_dynamic(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -174,7 +189,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded_static(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -210,7 +229,9 @@ class Convolution(LiquidEngine):
 
 
     def _run_opencl(self, image, kernel, device=None):
-        
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -244,16 +265,35 @@ class Convolution(LiquidEngine):
         return image_out
 
     def _run_python(self, image, kernel):
+        """
+        @cpu
+        """
         return convolution2D_python(image, kernel).astype(np.float32)
 
     def _run_transonic(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_transonic(image, kernel).astype(np.float32)
 
     def _run_dask(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_dask(image, kernel).astype(np.float32)
 
     def _run_cuda(self, image, kernel):
+        """
+        @gpu
+        """
         return convolution2D_cuda(image, kernel).astype(np.float32)
 
     def _run_njit(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         return convolution2D_numba(image, kernel).astype(np.float32)
diff --git a/src/nanopyx/core/transform/_le_esrrf.pyx b/src/nanopyx/core/transform/_le_esrrf.pyx
index bc22ee34..df40fa2e 100644
--- a/src/nanopyx/core/transform/_le_esrrf.pyx
+++ b/src/nanopyx/core/transform/_le_esrrf.pyx
@@ -35,7 +35,9 @@ class eSRRF(LiquidEngine):
         return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -149,6 +151,11 @@ class eSRRF(LiquidEngine):
         return output_image
 
     def _run_threaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -162,6 +169,11 @@ class eSRRF(LiquidEngine):
 
         return radial_gradients
     def _run_threaded_guided(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded_guided".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -175,6 +187,11 @@ class eSRRF(LiquidEngine):
 
         return radial_gradients
     def _run_threaded_dynamic(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded_dynamic".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -188,6 +205,11 @@ class eSRRF(LiquidEngine):
 
         return radial_gradients
     def _run_threaded_static(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded_static".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -202,6 +224,10 @@ class eSRRF(LiquidEngine):
         return radial_gradients
 
     def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @cython
+        """
         runtype = "Unthreaded"
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
diff --git a/src/nanopyx/core/transform/_le_esrrf3d.pyx b/src/nanopyx/core/transform/_le_esrrf3d.pyx
index 662c1568..e1f27f7b 100644
--- a/src/nanopyx/core/transform/_le_esrrf3d.pyx
+++ b/src/nanopyx/core/transform/_le_esrrf3d.pyx
@@ -55,8 +55,12 @@ class eSRRF3D(LiquidEngine):
             image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
             return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
-    def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -119,8 +123,12 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -183,8 +191,12 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -247,8 +259,12 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -311,8 +327,11 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
index 8018d8a7..d5f62230 100644
--- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
@@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine):
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
         """
         @cpu
-        @threaded
         @cython
         """
         cdef int nFrames = image.shape[0]
@@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
     def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine):
         return super().benchmark(image, nrow, ncol, scale)
 
     def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
index d225e3b4..f080fd06 100644
--- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
@@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine):
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
         """
         @cpu
-        @threaded
         @cython
         """
         cdef int nFrames = image.shape[0]
@@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
     def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine):
         return super().benchmark(image, nrow, ncol, scale)
 
     def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
index d95c55de..4537579d 100644
--- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
@@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine):
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
         """
         @cpu
-        @threaded
         @cython
         """
         cdef int nFrames = image.shape[0]
@@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
     def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine):
         return super().benchmark(image, nrow, ncol, scale)
 
     def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
index 76dd3f0e..d53ee3d4 100644
--- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
@@ -115,7 +115,6 @@ class ShiftAndMagnify(LiquidEngine):
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
         """
         @cpu
-        @threaded
         @cython
         """
         cdef int nFrames = image.shape[0]
@@ -310,7 +309,9 @@ class ShiftScaleRotate(LiquidEngine):
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
     def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -364,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -398,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -432,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -466,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -500,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -579,7 +604,9 @@ class PolarTransform(LiquidEngine):
         return super().benchmark(image, nrow, ncol, scale)
 
     def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -637,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -671,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -705,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -739,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -773,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_nlm_denoising.pyx
index d7a97382..b5aab716 100644
--- a/src/nanopyx/core/transform/_le_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_nlm_denoising.pyx
@@ -71,6 +71,9 @@ class NLMDenoising(LiquidEngine):
 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -78,6 +81,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -155,6 +162,11 @@ class NLMDenoising(LiquidEngine):
                                             pad_size: -pad_size]).astype(np.float32))
 
     def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -215,6 +227,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -275,6 +292,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -335,6 +357,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -397,7 +424,9 @@ class NLMDenoising(LiquidEngine):
     
 
     def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
-        
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         
diff --git a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
index c9537358..f16d28a1 100644
--- a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
@@ -63,6 +63,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma)
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -70,6 +73,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -147,6 +154,11 @@ class NLMDenoising(LiquidEngine):
                                             pad_size: -pad_size]).astype(np.float32))
 
     def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -215,6 +227,11 @@ class NLMDenoising(LiquidEngine):
                         
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -283,6 +300,11 @@ class NLMDenoising(LiquidEngine):
                         
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -351,6 +373,11 @@ class NLMDenoising(LiquidEngine):
                         
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -421,6 +448,9 @@ class NLMDenoising(LiquidEngine):
     
         
     def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray:
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
         # QUEUE AND CONTEXT
diff --git a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
index 94908e91..b0358c27 100644
--- a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
@@ -62,6 +62,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False)
@@ -69,6 +72,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -129,6 +136,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -189,6 +201,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -249,6 +266,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -309,6 +331,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -371,6 +398,10 @@ class NLMDenoising(LiquidEngine):
     
         
     def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        @cython
+        """
         if device is None:
             device = _fastest_device
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
index 464ae495..0e9d1ab1 100644
--- a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
+++ b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
@@ -39,7 +39,10 @@ class RadialGradientConvergence(LiquidEngine):
         return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting)
 
     def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -68,7 +71,11 @@ class RadialGradientConvergence(LiquidEngine):
         return np.asarray(rgc_map,dtype=np.float32)
 
     def _run_threaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -95,7 +102,11 @@ class RadialGradientConvergence(LiquidEngine):
                             rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION,  fwhm, tSO, tSS, _sensitivity)
         return np.asarray(rgc_map,dtype=np.float32)
     def _run_threaded_guided(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -122,7 +133,11 @@ class RadialGradientConvergence(LiquidEngine):
                             rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION,  fwhm, tSO, tSS, _sensitivity)
         return np.asarray(rgc_map,dtype=np.float32)
     def _run_threaded_dynamic(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -149,7 +164,11 @@ class RadialGradientConvergence(LiquidEngine):
                             rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION,  fwhm, tSO, tSS, _sensitivity)
         return np.asarray(rgc_map,dtype=np.float32)
     def _run_threaded_static(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -178,7 +197,9 @@ class RadialGradientConvergence(LiquidEngine):
 
     
     def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/nanopyx/core/transform/_le_radiality.pyx b/src/nanopyx/core/transform/_le_radiality.pyx
index 663a7e30..bad66375 100644
--- a/src/nanopyx/core/transform/_le_radiality.pyx
+++ b/src/nanopyx/core/transform/_le_radiality.pyx
@@ -84,7 +84,11 @@ class Radiality(LiquidEngine):
         return np.asarray(imRad)"""
 
     def _run_threaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -120,7 +124,11 @@ class Radiality(LiquidEngine):
 
         return np.asarray(imRad)
     def _run_threaded_guided(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -156,7 +164,11 @@ class Radiality(LiquidEngine):
 
         return np.asarray(imRad)
     def _run_threaded_dynamic(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -192,7 +204,11 @@ class Radiality(LiquidEngine):
 
         return np.asarray(imRad)
     def _run_threaded_static(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -230,7 +246,9 @@ class Radiality(LiquidEngine):
 
     
     def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
index 6a25034e..002423ed 100644
--- a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
+++ b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
@@ -28,7 +28,10 @@ class GradientRobertsCross(LiquidEngine):
         return super().benchmark(image)
     
     def _run_unthreaded(self, float[:,:,:] image):
-
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
         cdef float [:,:,:] gradient_row = np.zeros_like(image)
@@ -41,6 +44,11 @@ class GradientRobertsCross(LiquidEngine):
         return gradient_col, gradient_row
     
     def _run_threaded(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -53,6 +61,11 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
     def _run_threaded_guided(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -65,6 +78,11 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
     def _run_threaded_dynamic(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -77,6 +95,11 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
     def _run_threaded_static(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -90,7 +113,9 @@ class GradientRobertsCross(LiquidEngine):
         return gradient_col, gradient_row
 
     def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1):
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
diff --git a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
index f221584c..d28896a9 100644
--- a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
+++ b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
@@ -42,7 +42,9 @@ class MandelbrotBenchmark(LiquidEngine):
         return super().benchmark(size, r_start, r_end, c_start, c_end)
 
     def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray:
-
+        """
+        @gpu
+        """
         if device is None:
             device = _fastest_device
 
@@ -76,6 +78,10 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot.get()
 
     def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -92,6 +98,11 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot
 
     def _run_threaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -107,6 +118,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
         return im_mandelbrot
     def _run_threaded_guided(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -122,6 +138,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
         return im_mandelbrot
     def _run_threaded_dynamic(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -137,6 +158,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
         return im_mandelbrot
     def _run_threaded_static(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -153,11 +179,19 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot
 
     def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot
 
     def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot

From 6fa0666ddcd9a1c44f333ad8151e8e90373c962a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Fri, 17 May 2024 11:35:40 +0100
Subject: [PATCH 09/14] Removing obsolete prints

---
 src/nanopyx/__liquid_engine__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py
index c40eabb5..a044d037 100644
--- a/src/nanopyx/__liquid_engine__.py
+++ b/src/nanopyx/__liquid_engine__.py
@@ -103,7 +103,6 @@ def __init__(
 
         # Lowercase everything for backwards compatibility
         self._benchmarks =  {k.lower(): v for k, v in self._benchmarks.items()}
-        print(self._benchmarks.keys())
 
         # check if the benchmark dictionary has a key for every available run type
         for run_type_designation in self._run_types.keys():
@@ -126,7 +125,6 @@ def __init__(
             )
             # Lowercase everything for backwards compatibility
             self._default_benchmarks =  {k.lower(): v for k, v in self._default_benchmarks.items()}
-            print(self._default_benchmarks.keys())
         except:
             self._default_benchmarks = []
 

From fcdcf43acdac4c57257d3adece5c10288103b72a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Brito?=
 <50997716+antmsbrito@users.noreply.github.com>
Date: Fri, 17 May 2024 13:01:23 +0100
Subject: [PATCH 10/14] Added further error handling to the agent

---
 src/nanopyx/__agent__.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py
index 4939a1c0..be03b53c 100644
--- a/src/nanopyx/__agent__.py
+++ b/src/nanopyx/__agent__.py
@@ -207,7 +207,18 @@ def get_run_type(self, fn, args, kwargs,_possible_runtypes=[]):
         """
 
         # Get list of run types
-        fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes)
+        try:
+            fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes)
+        except TypeError:
+            print(f"There seems to be an error regarding your benchmarks. \n\
+To give full control to the agent please ensure that one of the following is true: \n\
+\t - You have at least 3 benchmarks for all runtypes using any set of args,kwargs \n\
+\t - Provide a set of default benchmarks during the Liquid Engine class creation \n\
+Otherwise explicity choose one of the following run_types:")
+            print('\t-','\n\t- '.join(fn._run_types.keys()))
+
+            print("The agent will choose a random run_type")
+            return random.choices(list(fn._run_types.keys()), k=1)[0]
 
         # Penalize the average time a run_type had if that run_type was delayed in previous runs
         if len(self.delayed_runtypes.keys()) > 0:

From 699f3e2d30e887d70cff6b01161997cc9eab4946 Mon Sep 17 00:00:00 2001
From: Bruno Saraiva <bruno.msaraiva2@gmail.com>
Date: Fri, 17 May 2024 14:05:04 +0100
Subject: [PATCH 11/14] example of implementing custom le class with switch

---
 .../LiquidEngineImplementationExample.ipynb   | 139 ++++++++++++++++++
 notebooks/myliquidengineclass.py              |  22 +++
 2 files changed, 161 insertions(+)
 create mode 100644 notebooks/LiquidEngineImplementationExample.ipynb
 create mode 100644 notebooks/myliquidengineclass.py

diff --git a/notebooks/LiquidEngineImplementationExample.ipynb b/notebooks/LiquidEngineImplementationExample.ipynb
new file mode 100644
index 00000000..ad8df266
--- /dev/null
+++ b/notebooks/LiquidEngineImplementationExample.ipynb
@@ -0,0 +1,139 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create a Liquid Engine Class implementing the two modes of Scikit-image NLM denoising as two different implementations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a random image array to be processed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "img = np.random.random((1, 100, 100)).astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Benchmark the two implementations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cupy implementation is not available. Make sure you have the right version of Cupy and CUDA installed.\n",
+      "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 21.019929375033826 seconds\n",
+      "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.3058308749459684 seconds\n",
+      "Fastest run type: ski_nlm_nonfast\n",
+      "Slowest run type: ski_nlm_fast\n",
+      "ski_nlm_nonfast is 68.73x faster than ski_nlm_fast\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[(0.3058308749459684, 'ski_nlm_nonfast', None),\n",
+       " (21.019929375033826, 'ski_nlm_fast', None)]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from myliquidengineclass import MyLiquidEngineClass\n",
+    "my_liquid = MyLiquidEngineClass()\n",
+    "my_liquid.benchmark(img, patch_size=5, patch_distance=11, h=0.1, sigma=0.0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Benchmark the two implementations with different image size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 0.013037208002060652 seconds\n",
+      "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.06136862491257489 seconds\n",
+      "Fastest run type: ski_nlm_fast\n",
+      "Slowest run type: ski_nlm_nonfast\n",
+      "ski_nlm_fast is 4.71x faster than ski_nlm_nonfast\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[(0.013037208002060652, 'ski_nlm_fast', None),\n",
+       " (0.06136862491257489, 'ski_nlm_nonfast', None)]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "my_liquid = MyLiquidEngineClass()\n",
+    "my_liquid.benchmark(np.random.random((500, 500)).astype(np.float32), patch_size=5, patch_distance=1, h=0.1, sigma=0.0)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ocb_dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/myliquidengineclass.py b/notebooks/myliquidengineclass.py
new file mode 100644
index 00000000..91ab3b1d
--- /dev/null
+++ b/notebooks/myliquidengineclass.py
@@ -0,0 +1,22 @@
+import numpy as np
+from nanopyx.__liquid_engine__ import LiquidEngine
+from skimage.restoration import denoise_nl_means
+
+
+class MyLiquidEngineClass(LiquidEngine):
+
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
+        self._designation = "MyLiquidEngineClass"
+        super().__init__(
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
+
+    def run(self, image: np.ndarray, patch_size: int, patch_distance: int, h: float, sigma: float, run_type:bool=None):
+        if image.dtype != "np.float32":
+            image = image.astype("np.float32")
+        return self._run(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma)
+
+    def _run_ski_nlm_fast(self, image, patch_size, patch_distance, h, sigma):
+        return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
+
+    def _run_ski_nlm_nonfast(self, image, patch_size, patch_distance, h, sigma):
+        return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False)
\ No newline at end of file

From 27c233cc726d29dc48847e0d1a11795e194a2bf4 Mon Sep 17 00:00:00 2001
From: Bruno Saraiva <bruno.msaraiva2@gmail.com>
Date: Fri, 17 May 2024 14:05:13 +0100
Subject: [PATCH 12/14] update to pytest settings

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d9047f8f..0f35f3ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -139,7 +139,7 @@ reportUndefinedVariable = false
 reportMissingImports = false
 
 [tool.pytest.ini_options]
-addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks"
+addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks --cov-report term-missing"
 timeout = 6001
 plt_dirname = "tests_plots"
 doctest_encoding = "latin1"

From 11ada1b4fa8078a4b793e265cc271d9b6e879491 Mon Sep 17 00:00:00 2001
From: Bruno Saraiva <bruno.msaraiva2@gmail.com>
Date: Wed, 22 May 2024 12:13:11 +0100
Subject: [PATCH 13/14] changed to use external liquid_engine package

---
 pyproject.toml                   |   1 +
 src/nanopyx/__agent__.py         | 261 +-----------------
 src/nanopyx/__liquid_engine__.py | 452 +------------------------------
 3 files changed, 3 insertions(+), 711 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0f35f3ef..fb2b1eee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
+    "liquid_engine",
     "mako>=1.3.0",
     "cython>=0.29.32",
     "numpy>=1.22,<2",
diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py
index be03b53c..641a80a1 100644
--- a/src/nanopyx/__agent__.py
+++ b/src/nanopyx/__agent__.py
@@ -1,260 +1 @@
-import platform
-import random
-
-import numpy as np
-from sklearn.linear_model import LogisticRegression
-from scipy.stats import norm
-
-from .__njit__ import njit_works
-from .__opencl__ import opencl_works, devices
-
-
-class Agent_:
-
-    """
-    Base class for the Agent of the Nanopyx Liquid Engine
-    Pond, James Pond
-    """
-
-    def __init__(
-        self,
-    ) -> None:
-        """
-        Initialize the Agent
-        The agent is supposed to work as a singleton object, initialized only once in the __init__.py of nanopyx
-        PS: (Is this good enough or is it necessary to implement the singleton design pattern?)
-
-        Agent responsabilities:
-            1. Store the current state of the machine (e.g. OS, CPU, RAM, GPU, Python version etc.);
-            2. Store the current state of ALL initialized LE objects (e.g. anything that is currently running, anything that is scheduled to run,
-                runs previously executed in the current session etc.);
-            3. Whenever a LE object wants to run, it must query the Agent on what is the best implementation for it;
-            4. Tests whether there was an unexpected delay and adjust following paths based on it;
-        """
-
-        ### MACHINE INFO ###
-        self.os_info = {"OS": platform.platform(), "Architecture": platform.machine()}
-        self.cpu_info = {"CPU": platform.processor()}
-        self.ram_info = {"RAM": "TBD"}
-        self.py_info = {
-            "Version": platform.python_version(),
-            "Implementation": platform.python_implementation(),
-            "Compiler": platform.python_compiler(),
-        }
-
-        self.numba_info = {"Numba": njit_works()}
-        self.pyopencl_info = {"PyOpenCL": opencl_works(), "Devices": devices}
-        self.cuda_info = {"CUDA": "TBD"}
-        ### MACHINE INFO ###
-
-        self._current_runs = []
-        self._scheduled_runs = []
-        self._finished_runs = []
-
-        self.delayed_runtypes = {}  # Store runtypes as keys and their values as (delay_factor, delay_prob)
-
-    def _get_ordered_run_types(self, fn, args, kwargs,_possible_runtypes=[]):
-        """@public
-        Retrieves an ordered list of run_types for the given args and kwargs
-        """
-
-        if not _possible_runtypes:
-            _possible_runtypes = fn._run_types.keys()
-
-        # str representation of the arguments and their corresponding 'norm'
-        repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs)
-        # dictionary to hold speeds
-        fast_avg_speed = {}
-        fast_std_speed = {}
-        slow_avg_speed = {}
-        slow_std_speed = {}
-        # fn._benchmarks is a dictionary of dictionaries. The first key is the run_type, the second key is the repr_args
-        # Check every run_type for the most similar args
-        for run_type in fn._run_types:
-            if run_type not in _possible_runtypes:
-                continue
-            if repr_args in fn._benchmarks[run_type]:
-                run_info = fn._benchmarks[run_type][repr_args][1:]
-            else:
-                # if the repr_args are not in the benchmarks, find the most similar repr_args
-                best_score = np.inf
-                best_repr_args = None
-                for repr_args_ in fn._benchmarks[run_type]:
-                    score = np.abs(fn._benchmarks[run_type][repr_args_][0] - repr_norm)
-                    if score < best_score:
-                        best_score = score
-                        best_repr_args = repr_args_
-                # What happens if there are no benchmarks for this runtype?
-                if best_repr_args is None:
-                    run_info = [0]
-                else:
-                    run_info = fn._benchmarks[run_type][best_repr_args][1:]
-
-            if None in run_info: # yamls null are read into None python objects
-                continue
-
-            if len(run_info) < 2:
-                # Fall back to default values
-                if "opencl" in run_type:
-                    rt = "opencl"
-                else:
-                    rt = run_type
-
-                best_score = np.inf
-                best_repr_args = None
-                for repr_args_ in fn._default_benchmarks[rt]:
-                    score = np.abs(fn._default_benchmarks[rt][repr_args_][0] - repr_norm)
-                    if score < best_score:
-                        best_score = score
-                        best_repr_args = repr_args_
-                run_info = fn._default_benchmarks[rt][best_repr_args][1:]
-
-            run_info = np.array(run_info)
-            if len(run_info) > 50:
-                run_info = run_info[-50:]
-
-            fast_values = np.partition(run_info, len(run_info) // 2)[: len(run_info) // 2]
-            slow_values = np.partition(run_info, len(run_info) // 2)[len(run_info) // 2 :]
-            fast_avg_speed[run_type] = np.average(fast_values)
-            fast_std_speed[run_type] = np.std(fast_values)
-            slow_avg_speed[run_type] = np.average(slow_values)
-            slow_std_speed[run_type] = np.std(slow_values)
-
-        return fast_avg_speed, fast_std_speed, slow_avg_speed, slow_std_speed
-
-    def _calculate_prob_of_delay(self, runtimes_history, avg, std):
-        """@public
-        Calculates the probability that the given run_type is still delayed using historical data
-        """
-
-        # Boolean array, True if delay, False if not
-        delays = runtimes_history > avg + 4 * std
-
-        model = LogisticRegression()
-        model.fit([[state] for state in delays[:-1]], delays[1:])
-
-        return model.predict_proba([[True]])[:, model.classes_.tolist().index(True)][0]
-
-    def _check_delay(self, run_type, runtime, runtimes_history, verbose=True):
-        """@public
-        Checks if the given run_type ran delayed in the previous run when compared with historical data
-        If delayed:
-            1. Calculates a probability that this delay is maintained
-            2. Stores the delay factor and the probability
-        """
-        # TODO test 
-        threaded_runtypes = ["threaded", "threaded_static", "threaded_dynamic", "threaded_guided"]
-
-        runtimes_history = np.array(runtimes_history)
-        if len(runtimes_history) > 50:
-            runtimes_history = runtimes_history[-50:]
-        fast_values = np.partition(runtimes_history, len(runtimes_history) // 2)[: len(runtimes_history) // 2]
-        slow_values = np.partition(runtimes_history, len(runtimes_history) // 2)[len(runtimes_history) // 2 :]
-
-        fast_avg_speed = np.average(fast_values)
-        fast_std_speed = np.std(fast_values)
-        slow_avg_speed = np.average(slow_values)
-        slow_std_speed = np.std(slow_values)
-
-        if run_type in self.delayed_runtypes:
-            if runtime < (slow_avg_speed - slow_std_speed) or runtime < (fast_avg_speed + fast_std_speed):
-                if "threaded" in run_type:
-                    for threaded_run_type in threaded_runtypes:
-                        self.delayed_runtypes.pop(threaded_run_type, None)
-                else:
-                    if run_type in self.delayed_runtypes:
-                        self.delayed_runtypes.pop(run_type, None)
-                return "Delay off"
-
-        if runtime > fast_avg_speed + 4 * fast_std_speed:
-            runtimes_history = np.append(runtimes_history, runtime)
-            delay_factor = runtime / fast_avg_speed
-            try:
-                delay_prob = self._calculate_prob_of_delay(runtimes_history, fast_avg_speed, fast_std_speed)
-            except ValueError:
-                delay_prob = 0.01
-            if verbose:
-                print(
-                    f"Run type {run_type} was delayed in the previous run. Delay factor: {delay_factor}, Delay probability: {delay_prob}"
-                )
-
-            if "threaded" in run_type:
-                for threaded_run_type in threaded_runtypes:
-                    self.delayed_runtypes[threaded_run_type] = (delay_factor, delay_prob)
-            else:
-                self.delayed_runtypes[run_type] = (delay_factor, delay_prob)
-
-    def _adjust_times(self, fast_device_times, slow_device_times):
-        """@public
-        Adjusts the historic avg time of a run_type if it was delayed in previous runs
-        """
-        adjusted_times = fast_device_times.copy()
-        for runtype in self.delayed_runtypes.keys():
-            if runtype in fast_device_times.keys():
-                delay_factor, delay_prob = self.delayed_runtypes[runtype]
-                # Weighted avg by the probability the run_type is still delayed
-                # expected_time * P(~delay) + delayed_time * P(delay)
-                adjusted_times[runtype] = (
-                    fast_device_times[runtype] * (1 - delay_prob)
-                    + fast_device_times[runtype] * delay_factor * delay_prob
-                )
-
-        return adjusted_times
-
-    def get_run_type(self, fn, args, kwargs,_possible_runtypes=[]):
-        """
-        Returns the best run_type for the given args and kwargs
-        """
-
-        # Get list of run types
-        try:
-            fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs,_possible_runtypes)
-        except TypeError:
-            print(f"There seems to be an error regarding your benchmarks. \n\
-To give full control to the agent please ensure that one of the following is true: \n\
-\t - You have at least 3 benchmarks for all runtypes using any set of args,kwargs \n\
-\t - Provide a set of default benchmarks during the Liquid Engine class creation \n\
-Otherwise explicity choose one of the following run_types:")
-            print('\t-','\n\t- '.join(fn._run_types.keys()))
-
-            print("The agent will choose a random run_type")
-            return random.choices(list(fn._run_types.keys()), k=1)[0]
-
-        # Penalize the average time a run_type had if that run_type was delayed in previous runs
-        if len(self.delayed_runtypes.keys()) > 0:
-            adjusted_avg = self._adjust_times(fast_avg, slow_avg)
-
-            if sorted(fast_avg, key=fast_avg.get)[0] == sorted(adjusted_avg, key=adjusted_avg.get)[0]:
-                return sorted(fast_avg, key=fast_avg.get)[0]
-
-            weights = [(1 / adjusted_avg[k]) ** 2 for k in adjusted_avg]
-            weights = weights / np.sum(weights)
-
-            # failsafe
-            if sum(weights) == 0:
-                weights = [1 for k in adjusted_avg]
-
-            return random.choices(list(adjusted_avg.keys()), weights=weights, k=1)[0]
-        else:
-            return sorted(fast_avg, key=fast_avg.get)[0]
-
-    def _inform(self, fn, verbose=True):
-        """@public
-        Informs the Agent that a LE object finished running
-        """
-
-        repr_args = fn._last_args
-        run_type = fn._last_runtype
-
-        historical_data = fn._benchmarks[run_type][repr_args][1:]
-
-        assert historical_data[-1] == fn._last_time, "Historical data is not consistent with the last runtime"
-
-        if verbose:
-            print(f"Agent: {fn._designation} using {run_type} ran in {fn._last_time} seconds")
-
-        if len(historical_data) > 19:
-            self._check_delay(run_type, historical_data[-1], historical_data[:-1], verbose=verbose)
-
-
-Agent = Agent_()
+from liquid_engine import Agent
\ No newline at end of file
diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py
index a044d037..6638e067 100644
--- a/src/nanopyx/__liquid_engine__.py
+++ b/src/nanopyx/__liquid_engine__.py
@@ -1,451 +1 @@
-import os
-import timeit
-import yaml
-import datetime
-import inspect
-import warnings
-from functools import partial, reduce
-from itertools import combinations
-from pathlib import Path
-
-from importlib_resources import files
-
-import numpy as np
-
-# This will in the future come from the Agent
-from .__njit__ import njit_works
-from .__dask__ import dask_works
-from .__transonic__ import transonic_works
-from .__cuda__ import cuda_works
-from .__opencl__ import opencl_works, devices, cl
-
-__home_folder__ = os.path.expanduser("~")
-__benchmark_folder__ = os.path.join(__home_folder__, ".nanopyx")
-if not os.path.exists(__benchmark_folder__):
-    os.makedirs(__benchmark_folder__)
-
-from .__agent__ import Agent  # noqa: E402
-
-from .core.analysis.pearson_correlation import pearson_correlation
-
-
-class LiquidEngine:
-    """@public
-    Base class for parts of the Nanopyx Liquid Engine
-    Vroom Vroom
-    """
-
-    def __init__(
-        self,
-        testing: bool = False,
-        clear_benchmarks: bool = False,
-        verbose: bool = True,
-    ) -> None:
-        """@public
-        Initialize the Liquid Engine
-        The Liquid Engine base class is inherited by children classes that implement specific methods
-
-        Engine responsabilities:
-        1. Store implemented run types;
-        2. Handle previous benchmarks and I/O;
-        2. When queried, benchmark all available run types;
-        3. Run a specific method using a selected run type;
-
-        Benchmark files have the following format:
-        The benchmark file is read as dict of dicts.
-            BENCHMARK DICT FOR A SPECIFIC METHOD
-                |- RUN_TYPE #1
-                |      |- ARGS_REPR #1
-                |      |      |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail
-                |      |- ARGS_REPR #2
-                |      |      |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail
-                |      (...)
-                |- RUN_TYPE #2
-                (...)
-        """
-
-        # Start by checking available run types
-        self._run_types = {}
-        for rt in inspect.getmembers(self,inspect.ismethod):
-            if rt[0].startswith('_run_'):
-                runtypename = '_'.join(rt[0].split('_')[2:]).lower()
-                # TODO Recheck this logic TODO
-                if 'numba' in runtypename and not njit_works():
-                    continue
-                elif 'dask' in runtypename and not dask_works():
-                    continue
-                elif 'transonic' in runtypename and not transonic_works():
-                    continue
-                elif 'cuda' in runtypename and not cuda_works():
-                    continue
-                elif 'opencl' in runtypename and not opencl_works():
-                    continue
-                else:
-                    self._run_types[runtypename] = rt[1]
-
-        self.testing = testing
-        self.mem_div = 1
-
-        # benchmarks file path
-        # e.g.: ~/.nanopyx/liquid/_le_interpolation_nearest_neighbor.cpython-310-darwin/ShiftAndMagnify.yml
-        base_path = os.path.join(
-            __benchmark_folder__, "liquid", os.path.split(os.path.splitext(inspect.getfile(self.__class__))[0])[1]
-        )
-        os.makedirs(base_path, exist_ok=True)
-        self._benchmark_filepath = os.path.join(base_path, self.__class__.__name__ + ".yml")
-
-        # Load benchmark file if it exists, otherwise create an empty config
-        if not clear_benchmarks and os.path.exists(self._benchmark_filepath):
-            with open(self._benchmark_filepath) as f:
-                self._benchmarks = yaml.load(f, Loader=yaml.FullLoader)
-        else:
-            self._benchmarks = {}
-
-        # Lowercase everything for backwards compatibility
-        self._benchmarks =  {k.lower(): v for k, v in self._benchmarks.items()}
-
-        # check if the benchmark dictionary has a key for every available run type
-        for run_type_designation in self._run_types.keys():
-            if run_type_designation not in self._benchmarks:
-                self._benchmarks[run_type_designation] = {}
-
-        # helper attribute for benchmarking function
-        self._last_args = None
-        self._last_runtype = None
-        self._last_time = None
-
-        self.Agent = Agent
-
-        # load defaults
-        try:
-            self._default_benchmarks = yaml.safe_load(
-                files(f'liquid_benchmarks.{inspect.getmodule(self.__class__).__name__.split(".")[-1]}')
-                .joinpath(self.__class__.__name__ + ".yml")
-                .read_text()
-            )
-            # Lowercase everything for backwards compatibility
-            self._default_benchmarks =  {k.lower(): v for k, v in self._default_benchmarks.items()}
-        except:
-            self._default_benchmarks = []
-
-        self.verbose = verbose
-
-    def _run(self, *args, run_type=None, **kwargs):
-        """@public
-        Runs the function with the given args and kwargs
-
-        The code above does the following:
-        1. Check the specified run_type
-            - if str checks if the run type exists otherwise raise a NotImplementedError
-        2. It will run the _run_{run_type} function
-        3. It will return the result and the time taken to run
-
-        :param args: args for the function
-        :param run_type: the run type to use
-        :param kwargs: kwargs for the function
-        :return: the result and time taken
-        """
-
-        if run_type is not None:
-            run_type = run_type.lower()
-
-        if run_type is None and self.verbose:
-            print("Querying the Agent...")
-            run_type = self.Agent.get_run_type(self, args, kwargs)
-            print(f"Agent chose: {run_type}")
-        elif run_type is None:
-            run_type = self.Agent.get_run_type(self, args, kwargs)
-        elif run_type not in self._run_types:
-            
-            # Check if the tags in the run_types
-            _possible_runtypes = [rt for rt in self._run_types.keys() if f"@{run_type}" in self._run_types[rt].__doc__]
-
-            if not _possible_runtypes:
-
-                print(f"Unexpected run type {run_type}")
-                print("Querying the Agent...")
-                run_type = self.Agent.get_run_type(self, args, kwargs)
-                print(f"Agent chose: {run_type}")
-            
-            else:
-
-                print(f"Choosing between all {run_type} implementations")
-                run_type = self.Agent.get_run_type(self, args, kwargs,_possible_runtypes)
-                print(f"Agent chose: {run_type}")
-
-        # try to run
-        try:
-            if self.mem_div > 999:
-                raise ValueError(
-                    f"Maxmimum memory division factor achieved, can not try any longer with {run_type}. Use a smaller input or a different run_type"
-                )
-            t_start = timeit.default_timer()
-            result = self._run_types[run_type](*args, **kwargs)
-            t2run = timeit.default_timer() - t_start
-            arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs)
-            self._store_results(arg_repr, arg_score, run_type, t2run)
-
-            self._last_time = t2run
-            self._last_args = arg_repr
-            self._last_runtype = run_type
-
-            self.Agent._inform(self, verbose=self.verbose)
-
-        except (cl.MemoryError, cl.LogicError) as e:
-            print("Found: ", e)
-            print("Reducing maximum buffer size and trying again...")
-            self.mem_div += 1
-            kwargs["mem_div"] = self.mem_div
-            result = self._run(*args, run_type=run_type, **kwargs)
-        except cl.Error as e:
-            if e.__str__() == "Buffer size is larger than device maximum memory allocation size":
-                print("Found: ", e)
-                print("Reducing maximum buffer size and trying again...")
-                self.mem_div += 1
-                kwargs["mem_div"] = self.mem_div
-                result = self._run(*args, run_type=run_type, **kwargs)
-            else:
-                print(f"Unexpected error while trying to run {run_type}")
-                print(e)
-                print("Please try again with another run type")
-                result = None
-        except Exception as e:
-            print(f"Unexpected error while trying to run {run_type}")
-            print(e)
-            print("Please try again with another run type")
-            result = None
-
-        self.mem_div = 1
-        return result
-
-    def benchmark(self, *args, **kwargs):
-        """
-        1. Run each available run type and record the run time and return value
-        2. Sort the run times from fastest to slowest
-        3. Compare each run type against each other, sorted by speed
-
-        :param args: args for the run method
-        :param kwargs: kwargs for the run method
-        :return:  a list of tuples containing the run time, run type name and optionally the return values
-        :rtype: [[run_time, run_type_name, return_value], ...]
-        """
-
-        # Create some lists to store runtimes and return values of run types
-        run_times = {}
-        returns = {}
-
-        # Run each run type and record the run time and return value
-        for run_type in self._run_types:
-            r = self._run(*args, run_type=run_type, **kwargs)
-
-            run_times[run_type] = self._last_time
-
-            if self.testing:  # Store return values if testing
-                returns[run_type] = r
-            else:
-                returns[run_type] = None
-
-        # Sort run_times by value
-        speed_sort = []
-        for run_type in sorted(run_times, key=run_times.get, reverse=False):
-            speed_sort.append(
-                (
-                    run_times[run_type],
-                    run_type,
-                    returns[run_type],
-                )
-            )
-
-        print(f"Fastest run type: {speed_sort[0][1]}")
-        print(f"Slowest run type: {speed_sort[-1][1]}")
-
-        # Compare each run type against each other, sorted by speed
-        different_runtypes = []
-        for pair in combinations(speed_sort, 2):
-            print(f"{pair[0][1]} is {pair[1][0]/pair[0][0]:.2f}x faster than {pair[1][1]}")
-            if self.testing:
-                if self._compare_runs(pair[0][2], pair[1][2]):
-                    print(f"{pair[0][1]} and {pair[1][1]} have similar outputs!")
-                else:
-                    warnings.warn(f"WARNING: outputs of {pair[0][1]} and {pair[1][1]} don't match!")
-                    different_runtypes.append(set([pair[0][1], pair[1][1]]))
-        if len(different_runtypes) <= len(self._run_types) - 1:
-            try:
-                common_runtype = reduce(lambda a, b: a & b, different_runtypes)
-            except TypeError:
-                common_runtype = {}
-            if common_runtype:
-                warnings.warn(f"WARNING: disabling {list(common_runtype)[0]} for this set of arguments!")
-                arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs)
-                self._store_results(arg_repr, arg_score, list(common_runtype)[0], None)  # None saves to null in yamls
-
-        return speed_sort
-
-    def _compare_runs(self, output_1, output_2):
-        """@public"""
-        if output_1.ndim > 2:
-            pcc = 0
-            for i in range(output_1.shape[0]):
-                pcc += pearson_correlation(output_1[i, :, :], output_2[i, :, :])
-            pcc /= output_1.shape[0]
-        else:
-            pcc = pearson_correlation(output_1, output_2)
-
-        if pcc > 0.8:
-            return True
-        else:
-            return False
-
-    def _get_cl_code(self, file_name, cl_dp):
-        """
-        Retrieves the OpenCL code from the corresponding .cl file
-        """
-        cl_file = os.path.splitext(file_name)[0] + ".cl"
-
-        if not os.path.exists(cl_file):
-            cl_file = Path(os.path.abspath(inspect.getfile(self.__class__))).parent / file_name
-
-        assert os.path.exists(cl_file), "Could not find OpenCL file: " + str(cl_file)
-
-        kernel_str = open(cl_file).read()
-
-        if not cl_dp:
-            kernel_str = kernel_str.replace("double", "float")
-
-        return kernel_str
-
-    def _store_results(self, arg_repr, arg_score, run_type, t2run):
-        """@public
-        Stores the results of a run
-        """
-
-        # Check if the run type has been run, and if not create empty info
-        run_type_benchs = self._benchmarks[run_type]
-        if arg_repr not in run_type_benchs:
-            run_type_benchs[arg_repr] = [arg_score]
-
-        # Get the run info
-        c = run_type_benchs[arg_repr]
-
-        assert c[0] == arg_score, "arg_score mismatch"
-
-        c.append(t2run)
-
-        self._dump_run_times()
-
-    def _dump_run_times(
-        self,
-    ):
-        """@public"""
-        # TODO We might need to wrap this into a multiprocessing.Queue if we find it blocking
-        with open(self._benchmark_filepath, "w") as f:
-            yaml.dump(self._benchmarks, f)
-
-    def _get_args_repr_score(self, *args, **kwargs):
-        """@public
-        Get a string representation of the args and kwargs and corresponding 'score' / 'norm'
-        The idea is that similar args have closer 'score'. Fuzzy logic
-
-        The code does the following:
-        1. It converts any args that are floats or ints to "number()" strings, and any args that are tensors to "shape()" strings
-        2. It converts any kwargs that are floats or ints to "number()" strings, and any kwargs that are tensors to "shape()" strings
-        3. The 'score' is given by the product of all the floats or ints and all the shape sizes.
-
-        :return: the string representation of the args and kwargs
-        :rtype: str
-        """
-        _norm = 1
-        _args = []
-        for arg in args:
-            if type(arg) in (float, int):
-                _args.append(f"number({arg})")
-                if arg == 0:
-                    arg = 1
-                _norm *= arg
-            elif hasattr(arg, "shape"):
-                _args.append(f"shape{arg.shape}")
-                _norm *= arg.size
-            else:
-                _args.append(arg)
-
-        _kwargs = {}
-        for k, v in kwargs.items():
-            if type(v) in (float, int):
-                _kwargs[k] = f"number({v})"
-                if v == 0:
-                    v = 1
-                _norm *= v
-            if hasattr(v, "shape"):
-                _kwargs[k] = f"shape{arg.shape}"
-                _norm *= v.size
-            else:
-                _kwargs[k] = v
-
-        return repr((_args, _kwargs)), _norm
-
-    def get_highest_divisor(self, size_, max_):
-        """
-        Returns the highest divisor of size_ that is still lower than max_
-        """
-        value = 1
-        for i in range(1, int(np.sqrt(size_) + 1)):
-            if size_ % i == 0:
-                if i * i != size_:
-                    div2 = size_ / i
-
-                    if i < max_:
-                        value = max(value, i)
-                    if div2 < max_:
-                        value = max(value, div2)
-        return int(value)
-
-    def get_work_group(self, device, shape):
-        """
-        Calculates work group size for a given device and shape of global work space
-        """
-
-        max_wg_dims = device.max_work_item_sizes[0:3]
-        max_glo_dims = device.max_work_group_size
-
-        three = self.get_highest_divisor(shape[2], max_wg_dims[2])
-        max_two = max_glo_dims / three
-        two = self.get_highest_divisor(shape[1], max_two)
-        one = 1
-        return (one, two, three)
-
-    def _check_max_slices(self, input, number_of_max_slices):
-        """@public
-        Checks if number of maximum slices is greater than 0
-        """
-        if number_of_max_slices < 1:
-            raise ValueError("This device doesn't have enough memory to run this function with this input")
-        elif input.shape[0] < number_of_max_slices:
-            return input.shape[0]
-        else:
-            return number_of_max_slices
-
-    def _check_max_buffer_size(self, size, device, n_slices):
-        """@public
-        Checks if buffer size is larger than device maximum memory allocation size and n_slices is 1 and raises appropriate errors that are handled in the _run function.
-        """
-        if size > device.max_mem_alloc_size and n_slices == 1:
-            raise ValueError(
-                "This device cannot handle this input size with these parameters, try using a smaller input or other parameters"
-            )
-
-        if size > device.max_mem_alloc_size:
-            raise cl.Error("Buffer size is larger than device maximum memory allocation size")
-
-        return size
-
-    #####################################################
-    #                   RUN METHODS                     #
-    # THESE SHOULD ALWAYS BE OVERRIDEN BY CHILD CLASSES #
-    #####################################################
-
-    def run(self, *args, **kwargs):
-        """
-        Runs the function with the given args and kwargs
-        Should be overridden by the any class that inherits from this class
-        """
-        return self._run(*args, **kwargs)
+from liquid_engine import LiquidEngine
\ No newline at end of file

From 0fb8da297d0806d29789999afc5466b6bf136e9f Mon Sep 17 00:00:00 2001
From: Bruno Saraiva <bruno.msaraiva2@gmail.com>
Date: Tue, 4 Jun 2024 10:43:22 +0100
Subject: [PATCH 14/14] added maximum accepted version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index fb2b1eee..4da05d08 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ build-backend = "setuptools.build_meta"
 name = "nanopyx"
 description = "Nanoscopy Python library (NanoPyx, the successor to NanoJ) - focused on light microscopy and super-resolution imaging"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.9,<3.12"
 license = { file = "LICENSE.txt" }
 keywords = [
     "NanoJ",