diff --git a/notebooks/LiquidEngineImplementationExample.ipynb b/notebooks/LiquidEngineImplementationExample.ipynb
new file mode 100644
index 00000000..ad8df266
--- /dev/null
+++ b/notebooks/LiquidEngineImplementationExample.ipynb
@@ -0,0 +1,139 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create a Liquid Engine Class implementing the two modes of Scikit-image NLM denoising as two different implementations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a random image array to be processed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "img = np.random.random((1, 100, 100)).astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Benchmark the two implementations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cupy implementation is not available. Make sure you have the right version of Cupy and CUDA installed.\n",
+      "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 21.019929375033826 seconds\n",
+      "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.3058308749459684 seconds\n",
+      "Fastest run type: ski_nlm_nonfast\n",
+      "Slowest run type: ski_nlm_fast\n",
+      "ski_nlm_nonfast is 68.73x faster than ski_nlm_fast\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[(0.3058308749459684, 'ski_nlm_nonfast', None),\n",
+       " (21.019929375033826, 'ski_nlm_fast', None)]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from myliquidengineclass import MyLiquidEngineClass\n",
+    "my_liquid = MyLiquidEngineClass()\n",
+    "my_liquid.benchmark(img, patch_size=5, patch_distance=11, h=0.1, sigma=0.0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Benchmark the two implementations with different image size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent: MyLiquidEngineClass using ski_nlm_fast ran in 0.013037208002060652 seconds\n",
+      "Agent: MyLiquidEngineClass using ski_nlm_nonfast ran in 0.06136862491257489 seconds\n",
+      "Fastest run type: ski_nlm_fast\n",
+      "Slowest run type: ski_nlm_nonfast\n",
+      "ski_nlm_fast is 4.71x faster than ski_nlm_nonfast\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[(0.013037208002060652, 'ski_nlm_fast', None),\n",
+       " (0.06136862491257489, 'ski_nlm_nonfast', None)]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "my_liquid = MyLiquidEngineClass()\n",
+    "my_liquid.benchmark(np.random.random((500, 500)).astype(np.float32), patch_size=5, patch_distance=1, h=0.1, sigma=0.0)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ocb_dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/myliquidengineclass.py b/notebooks/myliquidengineclass.py
new file mode 100644
index 00000000..91ab3b1d
--- /dev/null
+++ b/notebooks/myliquidengineclass.py
@@ -0,0 +1,22 @@
+import numpy as np
+from nanopyx.__liquid_engine__ import LiquidEngine
+from skimage.restoration import denoise_nl_means
+
+
+class MyLiquidEngineClass(LiquidEngine):
+
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
+        self._designation = "MyLiquidEngineClass"
+        super().__init__(
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
+
+    def run(self, image: np.ndarray, patch_size: int, patch_distance: int, h: float, sigma: float, run_type:bool=None):
+        if image.dtype != "np.float32":
+            image = image.astype("np.float32")
+        return self._run(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma)
+
+    def _run_ski_nlm_fast(self, image, patch_size, patch_distance, h, sigma):
+        return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
+
+    def _run_ski_nlm_nonfast(self, image, patch_size, patch_distance, h, sigma):
+        return denoise_nl_means(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index d9047f8f..4da05d08 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ build-backend = "setuptools.build_meta"
 name = "nanopyx"
 description = "Nanoscopy Python library (NanoPyx, the successor to NanoJ) - focused on light microscopy and super-resolution imaging"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.9,<3.12"
 license = { file = "LICENSE.txt" }
 keywords = [
     "NanoJ",
@@ -36,6 +36,7 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
+    "liquid_engine",
     "mako>=1.3.0",
     "cython>=0.29.32",
     "numpy>=1.22,<2",
@@ -139,7 +140,7 @@ reportUndefinedVariable = false
 reportMissingImports = false
 
 [tool.pytest.ini_options]
-addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks"
+addopts = "--cov=nanopyx --plots --doctest-modules --doctest-cython --ignore-glob=run*Tools.py --ignore=setup.py --ignore=notebooks/ --ignore=src/scripts --ignore=src/notebookchef --ignore=tests/notebooks --cov-report term-missing"
 timeout = 6001
 plt_dirname = "tests_plots"
 doctest_encoding = "latin1"
diff --git a/src/mako_templates/_le_interpolation_base.pyx b/src/mako_templates/_le_interpolation_base.pyx
index 62bc88bf..1503d573 100644
--- a/src/mako_templates/_le_interpolation_base.pyx
+++ b/src/mako_templates/_le_interpolation_base.pyx
@@ -9,7 +9,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_${self.attr.inter_name}.h":
@@ -23,10 +23,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_${self.attr.inter_name}"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -65,8 +62,12 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -115,6 +116,13 @@ class ShiftAndMagnify(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -152,11 +160,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_${self.attr.inter_name}"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -199,7 +205,12 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -252,6 +263,13 @@ class ShiftScaleRotate(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -298,11 +316,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_${self.attr.inter_name}"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -339,7 +355,12 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -396,7 +417,13 @@ class PolarTransform(LiquidEngine):
         
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
index 4adae44d..ec8db173 100644
--- a/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
+++ b/src/mako_templates/nanopyx.core.analysis._le_channel_registration.pyx
@@ -9,7 +9,7 @@ from cython.parallel import parallel, prange
 from libc.math cimport sqrt,pow
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .ccm cimport _calculate_slice_ccm
 
 from .estimate_shift import GetMaxOptimizer
@@ -99,8 +99,7 @@ class ChannelRegistrationEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ChannelRegistrationEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None):
         return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type)
@@ -110,6 +109,13 @@ class ChannelRegistrationEstimator(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         _runtype = "${sch}".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -251,8 +257,14 @@ class ChannelRegistrationEstimator(LiquidEngine):
 
     % endfor
 
-    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device):
-        _runtype = "OpenCL_" + device["device"].name
+    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+
+        _runtype = "opencl"
         crsm = ShiftAndMagnify(verbose=False)
 
         cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32)
diff --git a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
index de121ac0..1c928af6 100644
--- a/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
+++ b/src/mako_templates/nanopyx.core.analysis._le_drift_calculator.pyx
@@ -1,5 +1,5 @@
 <%!
-schedulers = ['unthreaded','threaded','threaded_guided','threaded_dynamic','threaded_static']
+schedulers = ['unthreaded','threaded']
 %># cython: infer_types=True, wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3, profile=False, autogen_pxd=False
 import time
 import scipy
@@ -27,10 +27,7 @@ class DriftEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "DriftEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing,
-            opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, 
-            threaded_dynamic_=False, threaded_guided_=False,
-            njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None):
         return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type)
@@ -40,7 +37,13 @@ class DriftEstimator(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         if not _check_even_square(image):
             image = _make_even_square(image)
 
@@ -129,19 +132,9 @@ class DriftEstimator(LiquidEngine):
             % elif sch=='threaded':
             for s in prange(n_slices):
             % else:
-            for s in prange(n_slices,schedule="${sch.split('_')[1]}"): 
+            for s in prange(n_slices): 
             %endif
                 output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
 
         return np.asarray(output).astype(np.float32)
     %endfor
-
-
-# % if sch=='unthreaded':
-#     for i in range(n_blocks):
-#     % elif sch=='threaded':
-#     for i in prange(n_blocks):
-#     % else:
-#     for i in prange(n_blocks,schedule="${sch.split('_')[1]}"):
-#     %endif
-#         average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0)
\ No newline at end of file
diff --git a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
index 2cbc1ddf..fa72d800 100644
--- a/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_convolution.pyx
@@ -15,7 +15,7 @@ from libc.math cimport cos, sin
 from .__interpolation_tools__ import check_image, value2array
 from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 class Convolution(LiquidEngine):
@@ -26,11 +26,7 @@ class Convolution(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Conv2D"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True,
-            njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True,
-            verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, kernel, run_type=None):
         image = check_array(image)
@@ -41,7 +37,13 @@ class Convolution(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -85,8 +87,13 @@ class Convolution(LiquidEngine):
 
     % endfor
 
-    def _run_opencl(self, image, kernel, device):
-        
+    def _run_opencl(self, image, kernel, device=None):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -117,16 +124,35 @@ class Convolution(LiquidEngine):
         return image_out
 
     def _run_python(self, image, kernel):
+        """
+        @cpu
+        """
         return convolution2D_python(image, kernel).astype(np.float32)
 
     def _run_transonic(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_transonic(image, kernel).astype(np.float32)
 
     def _run_dask(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_dask(image, kernel).astype(np.float32)
 
     def _run_cuda(self, image, kernel):
+        """
+        @gpu
+        """
         return convolution2D_cuda(image, kernel).astype(np.float32)
 
     def _run_njit(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         return convolution2D_numba(image, kernel).astype(np.float32)
diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
index 9e40749e..09710307 100644
--- a/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_esrrf.pyx
@@ -12,7 +12,7 @@ from libc.math cimport cos, sin
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify
 from ._le_roberts_cross_gradients import GradientRobertsCross
@@ -26,10 +26,7 @@ class eSRRF(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_ST"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None):
         image = check_image(image)
@@ -40,6 +37,12 @@ class eSRRF(LiquidEngine):
         return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+
         # TODO doIntensityWeighting is irrelevant on gpu2
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -151,6 +154,11 @@ class eSRRF(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "${sch}".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -166,6 +174,10 @@ class eSRRF(LiquidEngine):
     % endfor
 
     def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @cython
+        """
         runtype = "Unthreaded"
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
diff --git a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
index 2f244a0b..e10253b3 100644
--- a/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_esrrf3d.pyx
@@ -28,9 +28,7 @@ class eSRRF3D(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_3D"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True, verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         self._gradients_s_interpolated = None
         self._gradients_r_interpolated = None
         self._gradients_c_interpolated = None
@@ -60,8 +58,14 @@ class eSRRF3D(LiquidEngine):
             return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     % for sch in schedulers:
-    def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_${sch}(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
diff --git a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
index d49153be..e854107f 100644
--- a/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_nlm_denoising.pyx
@@ -14,7 +14,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 import os
 os.environ['PYOPENCL_NO_CACHE']='1'
@@ -38,9 +38,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -76,6 +73,9 @@ class NLMDenoising(LiquidEngine):
 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -83,6 +83,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -161,6 +165,11 @@ class NLMDenoising(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -227,7 +236,13 @@ class NLMDenoising(LiquidEngine):
         %endfor
     
 
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+        
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
index d828e973..40ceeacf 100644
--- a/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_patch_nlm_denoising.pyx
@@ -12,7 +12,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_integral_image.h":
@@ -31,9 +31,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_patch"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -68,6 +65,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma)
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -75,6 +75,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -153,6 +157,11 @@ class NLMDenoising(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -231,7 +240,12 @@ class NLMDenoising(LiquidEngine):
     %endfor
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
diff --git a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
index 32ba7c96..0d90e2a4 100644
--- a/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_pixel_nlm_denoising.pyx
@@ -14,7 +14,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_patch_distance.h":
@@ -30,9 +30,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_pixel"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -67,6 +64,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False)
@@ -75,6 +75,13 @@ class NLMDenoising(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        % if sch!='unthreaded':
+        @threaded
+        % endif
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -143,7 +150,13 @@ class NLMDenoising(LiquidEngine):
         %endfor
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        @cython
+        """
+        if device is None:
+            device = _fastest_device
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
index 651e6e50..3ad05ca6 100644
--- a/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_radial_gradient_convergence.pyx
@@ -8,7 +8,7 @@ cimport numpy as np
 from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pow
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 from .__interpolation_tools__ import check_image
 
@@ -24,8 +24,6 @@ class RadialGradientConvergence(LiquidEngine):
         self._designation = "RadialGradientConvergence"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
 
@@ -43,7 +41,10 @@ class RadialGradientConvergence(LiquidEngine):
         return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting)
 
     def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -73,7 +74,11 @@ class RadialGradientConvergence(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -107,6 +112,11 @@ class RadialGradientConvergence(LiquidEngine):
 
     
     def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # gradient gxgymag*mag*size
         # image_interp = mag*size
diff --git a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
index 14aa2fb5..4b4ac4b5 100644
--- a/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_radiality.pyx
@@ -9,7 +9,7 @@ from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pi, fabs, cos, sin
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .__interpolation_tools__ import check_image
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify
@@ -35,8 +35,6 @@ class Radiality(LiquidEngine):
         self._designation = "Radiality"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=False, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): 
@@ -49,7 +47,7 @@ class Radiality(LiquidEngine):
         image_interp = check_image(image_interp)
         return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting)
 
-    def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
+    """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
 
         cdef int _magnification = magnification
         cdef int _border = border
@@ -85,11 +83,15 @@ class Radiality(LiquidEngine):
                         else:
                             imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w)
 
-        return np.asarray(imRad)
+        return np.asarray(imRad)"""
 
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -132,6 +134,11 @@ class Radiality(LiquidEngine):
 
     
     def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         cl_ctx = cl.Context([device['device']])
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
index 48967a41..d3e8ae1c 100644
--- a/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
+++ b/src/mako_templates/nanopyx.core.transform._le_roberts_cross_gradients.pyx
@@ -4,7 +4,7 @@ schedulers = ['threaded','threaded_guided','threaded_dynamic','threaded_static']
 
 import numpy as np
 cimport numpy as np
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 
 from cython.parallel import prange
@@ -19,8 +19,6 @@ class GradientRobertsCross(LiquidEngine):
         self._designation = "GradientRobertsCross"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, run_type = None):
@@ -32,7 +30,10 @@ class GradientRobertsCross(LiquidEngine):
         return super().benchmark(image)
     
     def _run_unthreaded(self, float[:,:,:] image):
-
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
         cdef float [:,:,:] gradient_row = np.zeros_like(image)
@@ -46,6 +47,11 @@ class GradientRobertsCross(LiquidEngine):
     
     % for sch in schedulers:
     def _run_${sch}(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -63,7 +69,12 @@ class GradientRobertsCross(LiquidEngine):
         return gradient_col, gradient_row
     % endfor
 
-    def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1):
+    def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
index 0718f968..a8f4fcf9 100644
--- a/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
+++ b/src/mako_templates/nanopyx.core.utils._le_mandelbrot_benchmark.pyx
@@ -9,7 +9,7 @@ cimport numpy as np
 from cython.parallel import prange
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot
 from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot
 
@@ -25,9 +25,7 @@ class MandelbrotBenchmark(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Mandelbrot_Benchmark"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True,
+            clear_benchmarks=clear_benchmarks, testing=testing,
             verbose=verbose)
 
     def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray:
@@ -45,7 +43,12 @@ class MandelbrotBenchmark(LiquidEngine):
     def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1):
         return super().benchmark(size, r_start, r_end, c_start, c_end)
 
-    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray:
+    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -77,6 +80,10 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot.get()
 
     def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -94,6 +101,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
     % for sch in schedulers:
     def _run_${sch}(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -115,11 +127,19 @@ class MandelbrotBenchmark(LiquidEngine):
     % endfor
 
     def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot
 
     def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot
diff --git a/src/nanopyx/__agent__.py b/src/nanopyx/__agent__.py
index 946e893a..641a80a1 100644
--- a/src/nanopyx/__agent__.py
+++ b/src/nanopyx/__agent__.py
@@ -1,244 +1 @@
-import platform
-import random
-
-import numpy as np
-from sklearn.linear_model import LogisticRegression
-from scipy.stats import norm
-
-from .__njit__ import njit_works
-from .__opencl__ import opencl_works, devices
-
-
-class Agent_:
-
-    """
-    Base class for the Agent of the Nanopyx Liquid Engine
-    Pond, James Pond
-    """
-
-    def __init__(
-        self,
-    ) -> None:
-        """
-        Initialize the Agent
-        The agent is supposed to work as a singleton object, initialized only once in the __init__.py of nanopyx
-        PS: (Is this good enough or is it necessary to implement the singleton design pattern?)
-
-        Agent responsabilities:
-            1. Store the current state of the machine (e.g. OS, CPU, RAM, GPU, Python version etc.);
-            2. Store the current state of ALL initialized LE objects (e.g. anything that is currently running, anything that is scheduled to run,
-                runs previously executed in the current session etc.);
-            3. Whenever a LE object wants to run, it must query the Agent on what is the best implementation for it;
-            4. Tests whether there was an unexpected delay and adjust following paths based on it;
-        """
-
-        ### MACHINE INFO ###
-        self.os_info = {"OS": platform.platform(), "Architecture": platform.machine()}
-        self.cpu_info = {"CPU": platform.processor()}
-        self.ram_info = {"RAM": "TBD"}
-        self.py_info = {
-            "Version": platform.python_version(),
-            "Implementation": platform.python_implementation(),
-            "Compiler": platform.python_compiler(),
-        }
-
-        self.numba_info = {"Numba": njit_works()}
-        self.pyopencl_info = {"PyOpenCL": opencl_works(), "Devices": devices}
-        self.cuda_info = {"CUDA": "TBD"}
-        ### MACHINE INFO ###
-
-        self._current_runs = []
-        self._scheduled_runs = []
-        self._finished_runs = []
-
-        self.delayed_runtypes = {}  # Store runtypes as keys and their values as (delay_factor, delay_prob)
-
-    def _get_ordered_run_types(self, fn, args, kwargs):
-        """@public
-        Retrieves an ordered list of run_types for the given args and kwargs
-        """
-
-        # str representation of the arguments and their corresponding 'norm'
-        repr_args, repr_norm = fn._get_args_repr_score(*args, **kwargs)
-        # dictionary to hold speeds
-        fast_avg_speed = {}
-        fast_std_speed = {}
-        slow_avg_speed = {}
-        slow_std_speed = {}
-        # fn._benchmarks is a dictionary of dictionaries. The first key is the run_type, the second key is the repr_args
-        # Check every run_type for the most similar args
-        for run_type in fn._run_types:
-            if repr_args in fn._benchmarks[run_type]:
-                run_info = fn._benchmarks[run_type][repr_args][1:]
-            else:
-                # if the repr_args are not in the benchmarks, find the most similar repr_args
-                best_score = np.inf
-                best_repr_args = None
-                for repr_args_ in fn._benchmarks[run_type]:
-                    score = np.abs(fn._benchmarks[run_type][repr_args_][0] - repr_norm)
-                    if score < best_score:
-                        best_score = score
-                        best_repr_args = repr_args_
-                # What happens if there are no benchmarks for this runtype?
-                if best_repr_args is None:
-                    run_info = [0]
-                else:
-                    run_info = fn._benchmarks[run_type][best_repr_args][1:]
-
-            if None in run_info: # yamls null are read into None python objects
-                continue
-
-            if len(run_info) < 2:
-                # Fall back to default values
-                if "OpenCL" in run_type:
-                    rt = "OpenCL"
-                else:
-                    rt = run_type
-
-                best_score = np.inf
-                best_repr_args = None
-                for repr_args_ in fn._default_benchmarks[rt]:
-                    score = np.abs(fn._default_benchmarks[rt][repr_args_][0] - repr_norm)
-                    if score < best_score:
-                        best_score = score
-                        best_repr_args = repr_args_
-                run_info = fn._default_benchmarks[rt][best_repr_args][1:]
-
-            run_info = np.array(run_info)
-            if len(run_info) > 50:
-                run_info = run_info[-50:]
-
-            fast_values = np.partition(run_info, len(run_info) // 2)[: len(run_info) // 2]
-            slow_values = np.partition(run_info, len(run_info) // 2)[len(run_info) // 2 :]
-            fast_avg_speed[run_type] = np.average(fast_values)
-            fast_std_speed[run_type] = np.std(fast_values)
-            slow_avg_speed[run_type] = np.average(slow_values)
-            slow_std_speed[run_type] = np.std(slow_values)
-
-        return fast_avg_speed, fast_std_speed, slow_avg_speed, slow_std_speed
-
-    def _calculate_prob_of_delay(self, runtimes_history, avg, std):
-        """@public
-        Calculates the probability that the given run_type is still delayed using historical data
-        """
-
-        # Boolean array, True if delay, False if not
-        delays = runtimes_history > avg + 4 * std
-
-        model = LogisticRegression()
-        model.fit([[state] for state in delays[:-1]], delays[1:])
-
-        return model.predict_proba([[True]])[:, model.classes_.tolist().index(True)][0]
-
-    def _check_delay(self, run_type, runtime, runtimes_history, verbose=True):
-        """@public
-        Checks if the given run_type ran delayed in the previous run when compared with historical data
-        If delayed:
-            1. Calculates a probability that this delay is maintained
-            2. Stores the delay factor and the probability
-        """
-
-        threaded_runtypes = ["Threaded", "Threaded_static", "Threaded_dynamic", "Threaded_guided"]
-
-        runtimes_history = np.array(runtimes_history)
-        if len(runtimes_history) > 50:
-            runtimes_history = runtimes_history[-50:]
-        fast_values = np.partition(runtimes_history, len(runtimes_history) // 2)[: len(runtimes_history) // 2]
-        slow_values = np.partition(runtimes_history, len(runtimes_history) // 2)[len(runtimes_history) // 2 :]
-
-        fast_avg_speed = np.average(fast_values)
-        fast_std_speed = np.std(fast_values)
-        slow_avg_speed = np.average(slow_values)
-        slow_std_speed = np.std(slow_values)
-
-        if run_type in self.delayed_runtypes:
-            if runtime < (slow_avg_speed - slow_std_speed) or runtime < (fast_avg_speed + fast_std_speed):
-                if "Threaded" in run_type:
-                    for threaded_run_type in threaded_runtypes:
-                        self.delayed_runtypes.pop(threaded_run_type, None)
-                else:
-                    if run_type in self.delayed_runtypes:
-                        self.delayed_runtypes.pop(run_type, None)
-                return "Delay off"
-
-        if runtime > fast_avg_speed + 4 * fast_std_speed:
-            runtimes_history = np.append(runtimes_history, runtime)
-            delay_factor = runtime / fast_avg_speed
-            try:
-                delay_prob = self._calculate_prob_of_delay(runtimes_history, fast_avg_speed, fast_std_speed)
-            except ValueError:
-                delay_prob = 0.01
-            if verbose:
-                print(
-                    f"Run type {run_type} was delayed in the previous run. Delay factor: {delay_factor}, Delay probability: {delay_prob}"
-                )
-
-            if "Threaded" in run_type:
-                for threaded_run_type in threaded_runtypes:
-                    self.delayed_runtypes[threaded_run_type] = (delay_factor, delay_prob)
-            else:
-                self.delayed_runtypes[run_type] = (delay_factor, delay_prob)
-
-    def _adjust_times(self, fast_device_times, slow_device_times):
-        """@public
-        Adjusts the historic avg time of a run_type if it was delayed in previous runs
-        """
-        adjusted_times = fast_device_times.copy()
-        for runtype in self.delayed_runtypes.keys():
-            if runtype in fast_device_times.keys():
-                delay_factor, delay_prob = self.delayed_runtypes[runtype]
-                # Weighted avg by the probability the run_type is still delayed
-                # expected_time * P(~delay) + delayed_time * P(delay)
-                adjusted_times[runtype] = (
-                    fast_device_times[runtype] * (1 - delay_prob)
-                    + fast_device_times[runtype] * delay_factor * delay_prob
-                )
-
-        return adjusted_times
-
-    def get_run_type(self, fn, args, kwargs):
-        """
-        Returns the best run_type for the given args and kwargs
-        """
-
-        # Get list of run types
-        fast_avg, fast_std, slow_avg, slow_std = self._get_ordered_run_types(fn, args, kwargs)
-
-        # Penalize the average time a run_type had if that run_type was delayed in previous runs
-        if len(self.delayed_runtypes.keys()) > 0:
-            adjusted_avg = self._adjust_times(fast_avg, slow_avg)
-
-            if sorted(fast_avg, key=fast_avg.get)[0] == sorted(adjusted_avg, key=adjusted_avg.get)[0]:
-                return sorted(fast_avg, key=fast_avg.get)[0]
-
-            weights = [(1 / adjusted_avg[k]) ** 2 for k in adjusted_avg]
-            weights = weights / np.sum(weights)
-
-            # failsafe
-            if sum(weights) == 0:
-                weights = [1 for k in adjusted_avg]
-
-            return random.choices(list(adjusted_avg.keys()), weights=weights, k=1)[0]
-        else:
-            return sorted(fast_avg, key=fast_avg.get)[0]
-
-    def _inform(self, fn, verbose=True):
-        """@public
-        Informs the Agent that a LE object finished running
-        """
-
-        repr_args = fn._last_args
-        run_type = fn._last_runtype
-
-        historical_data = fn._benchmarks[run_type][repr_args][1:]
-
-        assert historical_data[-1] == fn._last_time, "Historical data is not consistent with the last runtime"
-
-        if verbose:
-            print(f"Agent: {fn._designation} using {run_type} ran in {fn._last_time} seconds")
-
-        if len(historical_data) > 19:
-            self._check_delay(run_type, historical_data[-1], historical_data[:-1], verbose=verbose)
-
-
-Agent = Agent_()
+from liquid_engine import Agent
\ No newline at end of file
diff --git a/src/nanopyx/__liquid_engine__.py b/src/nanopyx/__liquid_engine__.py
index 56b7b692..6638e067 100644
--- a/src/nanopyx/__liquid_engine__.py
+++ b/src/nanopyx/__liquid_engine__.py
@@ -1,531 +1 @@
-import os
-import timeit
-import yaml
-import datetime
-import inspect
-import warnings
-from functools import partial, reduce
-from itertools import combinations
-from pathlib import Path
-
-from importlib_resources import files
-
-import numpy as np
-
-# This will in the future come from the Agent
-from .__njit__ import njit_works
-from .__dask__ import dask_works
-from .__transonic__ import transonic_works
-from .__cuda__ import cuda_works
-from .__opencl__ import opencl_works, devices, cl
-
-__home_folder__ = os.path.expanduser("~")
-__benchmark_folder__ = os.path.join(__home_folder__, ".nanopyx")
-if not os.path.exists(__benchmark_folder__):
-    os.makedirs(__benchmark_folder__)
-
-from .__agent__ import Agent  # noqa: E402
-
-from .core.analysis.pearson_correlation import pearson_correlation
-
-
-class LiquidEngine:
-    """@public
-    Base class for parts of the Nanopyx Liquid Engine
-    Vroom Vroom
-    """
-
-    def __init__(
-        self,
-        testing: bool = False,
-        opencl_: bool = False,
-        unthreaded_: bool = False,
-        threaded_: bool = False,
-        threaded_static_: bool = False,
-        threaded_dynamic_: bool = False,
-        threaded_guided_: bool = False,
-        python_: bool = False,
-        njit_: bool = False,
-        dask_: bool = False,
-        transonic_: bool = False,
-        cuda_: bool = False,
-        clear_benchmarks: bool = False,
-        verbose: bool = True,
-    ) -> None:
-        """@public
-        Initialize the Liquid Engine
-        The Liquid Engine base class is inherited by children classes that implement specific methods
-
-        Engine responsabilities:
-        1. Store implemented run types;
-        2. Handle previous benchmarks and I/O;
-        2. When queried, benchmark all available run types;
-        3. Run a specific method using a selected run type;
-
-        Benchmark files have the following format:
-        The benchmark file is read as dict of dicts.
-            BENCHMARK DICT FOR A SPECIFIC METHOD
-                |- RUN_TYPE #1
-                |      |- ARGS_REPR #1
-                |      |      |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail
-                |      |- ARGS_REPR #2
-                |      |      |- [score, t2run#1, t2run#2, t2run#3, ...] last are newer. nan means fail
-                |      (...)
-                |- RUN_TYPE #2
-                (...)
-        """
-
-        # Start by checking available run types
-        self._run_types = {}
-        if opencl_ and opencl_works():
-            for d in devices:
-                self._run_types[f"OpenCL_{d['device'].name}"] = partial(self._run_opencl, device=d)
-        if threaded_:
-            self._run_types["Threaded"] = self._run_threaded
-        if unthreaded_:
-            self._run_types["Unthreaded"] = self._run_unthreaded
-        if threaded_static_:
-            self._run_types["Threaded_static"] = self._run_threaded_static
-        if threaded_dynamic_:
-            self._run_types["Threaded_dynamic"] = self._run_threaded_dynamic
-        if threaded_guided_:
-            self._run_types["Threaded_guided"] = self._run_threaded_guided
-        if python_:
-            self._run_types["Python"] = self._run_python
-        if njit_ and njit_works():
-            self._run_types["Numba"] = self._run_njit
-            # Try to trigger early compilation
-            try:
-                self._run_njit()
-            except TypeError:
-                print("Consider adding default arguments to the njit implementation to trigger early compilation")
-        if dask_ and dask_works():
-            self._run_types["Dask"] = self._run_dask
-        if transonic_ and transonic_works():
-            self._run_types["Transonic"] = self._run_transonic
-        if cuda_ and cuda_works():
-            self._run_types["Cuda"] = self._run_cuda
-
-        self.testing = testing
-        self.mem_div = 1
-
-        # benchmarks file path
-        # e.g.: ~/.nanopyx/liquid/_le_interpolation_nearest_neighbor.cpython-310-darwin/ShiftAndMagnify.yml
-        base_path = os.path.join(
-            __benchmark_folder__, "liquid", os.path.split(os.path.splitext(inspect.getfile(self.__class__))[0])[1]
-        )
-        os.makedirs(base_path, exist_ok=True)
-        self._benchmark_filepath = os.path.join(base_path, self.__class__.__name__ + ".yml")
-
-        # Load config file if it exists, otherwise create an empty config
-        if not clear_benchmarks and os.path.exists(self._benchmark_filepath):
-            with open(self._benchmark_filepath) as f:
-                self._benchmarks = yaml.load(f, Loader=yaml.FullLoader)
-        else:
-            self._benchmarks = {}
-
-        # check if the cfg dictionary has a key for every available run type
-        for run_type_designation in self._run_types.keys():
-            if run_type_designation not in self._benchmarks:
-                self._benchmarks[run_type_designation] = {}
-
-        # helper attribute for benchmarking function
-        self._last_args = None
-        self._last_runtype = None
-        self._last_time = None
-
-        self.Agent = Agent
-
-        # load defaults
-        try:
-            self._default_benchmarks = yaml.safe_load(
-                files(f'liquid_benchmarks.{inspect.getmodule(self.__class__).__name__.split(".")[-1]}')
-                .joinpath(self.__class__.__name__ + ".yml")
-                .read_text()
-            )
-        except:
-            self._default_benchmarks = []
-
-        self.verbose = verbose
-
-    def _run(self, *args, run_type=None, **kwargs):
-        """@public
-        Runs the function with the given args and kwargs
-
-        The code above does the following:
-        1. Check the specified run_type
-            - if str checks if the run type exists otherwise raise a NotImplementedError
-        2. It will run the _run_{run_type} function
-        3. It will return the result and the time taken to run
-
-        :param args: args for the function
-        :param run_type: the run type to use
-        :param kwargs: kwargs for the function
-        :return: the result and time taken
-        """
-
-        if run_type is None and self.verbose:
-            print("Querying the Agent...")
-            run_type = self.Agent.get_run_type(self, args, kwargs)
-            print(f"Agent chose: {run_type}")
-        elif run_type is None:
-            run_type = self.Agent.get_run_type(self, args, kwargs)
-        elif run_type not in self._run_types:
-            print(f"Unexpected run type {run_type}")
-            print("Querying the Agent...")
-            run_type = self.Agent.get_run_type(self, args, kwargs)
-            print(f"Agent chose: {run_type}")
-
-        # try to run
-        try:
-            if self.mem_div > 999:
-                raise ValueError(
-                    f"Maxmimum memory division factor achieved, can not try any longer with {run_type}. Use a smaller input or a different run_type"
-                )
-            t_start = timeit.default_timer()
-            result = self._run_types[run_type](*args, **kwargs)
-            t2run = timeit.default_timer() - t_start
-            arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs)
-            self._store_results(arg_repr, arg_score, run_type, t2run)
-
-            self._last_time = t2run
-            self._last_args = arg_repr
-            self._last_runtype = run_type
-
-            self.Agent._inform(self, verbose=self.verbose)
-
-        except (cl.MemoryError, cl.LogicError) as e:
-            print("Found: ", e)
-            print("Reducing maximum buffer size and trying again...")
-            self.mem_div += 1
-            kwargs["mem_div"] = self.mem_div
-            result = self._run(*args, run_type=run_type, **kwargs)
-        except cl.Error as e:
-            if e.__str__() == "Buffer size is larger than device maximum memory allocation size":
-                print("Found: ", e)
-                print("Reducing maximum buffer size and trying again...")
-                self.mem_div += 1
-                kwargs["mem_div"] = self.mem_div
-                result = self._run(*args, run_type=run_type, **kwargs)
-            else:
-                print(f"Unexpected error while trying to run {run_type}")
-                print(e)
-                print("Please try again with another run type")
-                result = None
-        except Exception as e:
-            print(f"Unexpected error while trying to run {run_type}")
-            print(e)
-            print("Please try again with another run type")
-            result = None
-
-        self.mem_div = 1
-        return result
-
-    def benchmark(self, *args, **kwargs):
-        """
-        1. Run each available run type and record the run time and return value
-        2. Sort the run times from fastest to slowest
-        3. Compare each run type against each other, sorted by speed
-
-        :param args: args for the run method
-        :param kwargs: kwargs for the run method
-        :return:  a list of tuples containing the run time, run type name and optionally the return values
-        :rtype: [[run_time, run_type_name, return_value], ...]
-        """
-
-        # Create some lists to store runtimes and return values of run types
-        run_times = {}
-        returns = {}
-
-        # Run each run type and record the run time and return value
-        for run_type in self._run_types:
-            r = self._run(*args, run_type=run_type, **kwargs)
-
-            run_times[run_type] = self._last_time
-
-            if self.testing:  # Store return values if testing
-                returns[run_type] = r
-            else:
-                returns[run_type] = None
-
-        # Sort run_times by value
-        speed_sort = []
-        for run_type in sorted(run_times, key=run_times.get, reverse=False):
-            speed_sort.append(
-                (
-                    run_times[run_type],
-                    run_type,
-                    returns[run_type],
-                )
-            )
-
-        print(f"Fastest run type: {speed_sort[0][1]}")
-        print(f"Slowest run type: {speed_sort[-1][1]}")
-
-        # Compare each run type against each other, sorted by speed
-        different_runtypes = []
-        for pair in combinations(speed_sort, 2):
-            print(f"{pair[0][1]} is {pair[1][0]/pair[0][0]:.2f}x faster than {pair[1][1]}")
-            if self.testing:
-                if self._compare_runs(pair[0][2], pair[1][2]):
-                    print(f"{pair[0][1]} and {pair[1][1]} have similar outputs!")
-                else:
-                    warnings.warn(f"WARNING: outputs of {pair[0][1]} and {pair[1][1]} don't match!")
-                    different_runtypes.append(set([pair[0][1], pair[1][1]]))
-        if len(different_runtypes) <= len(self._run_types) - 1:
-            try:
-                common_runtype = reduce(lambda a, b: a & b, different_runtypes)
-            except TypeError:
-                common_runtype = {}
-            if common_runtype:
-                warnings.warn(f"WARNING: disabling {list(common_runtype)[0]} for this set of arguments!")
-                arg_repr, arg_score = self._get_args_repr_score(*args, **kwargs)
-                self._store_results(arg_repr, arg_score, list(common_runtype)[0], None)  # None saves to null in yamls
-
-        return speed_sort
-
-    def _compare_runs(self, output_1, output_2):
-        """@public"""
-        if output_1.ndim > 2:
-            pcc = 0
-            for i in range(output_1.shape[0]):
-                pcc += pearson_correlation(output_1[i, :, :], output_2[i, :, :])
-            pcc /= output_1.shape[0]
-        else:
-            pcc = pearson_correlation(output_1, output_2)
-
-        if pcc > 0.8:
-            return True
-        else:
-            return False
-
-    def _get_cl_code(self, file_name, cl_dp):
-        """
-        Retrieves the OpenCL code from the corresponding .cl file
-        """
-        cl_file = os.path.splitext(file_name)[0] + ".cl"
-
-        if not os.path.exists(cl_file):
-            cl_file = Path(os.path.abspath(inspect.getfile(self.__class__))).parent / file_name
-
-        assert os.path.exists(cl_file), "Could not find OpenCL file: " + str(cl_file)
-
-        kernel_str = open(cl_file).read()
-
-        if not cl_dp:
-            kernel_str = kernel_str.replace("double", "float")
-
-        return kernel_str
-
-    def _store_results(self, arg_repr, arg_score, run_type, t2run):
-        """@public
-        Stores the results of a run
-        """
-
-        # Check if the run type has been run, and if not create empty info
-        run_type_benchs = self._benchmarks[run_type]
-        if arg_repr not in run_type_benchs:
-            run_type_benchs[arg_repr] = [arg_score]
-
-        # Get the run info
-        c = run_type_benchs[arg_repr]
-
-        assert c[0] == arg_score, "arg_score mismatch"
-
-        c.append(t2run)
-
-        self._dump_run_times()
-
-    def _dump_run_times(
-        self,
-    ):
-        """@public"""
-        # TODO We might need to wrap this into a multiprocessing.Queue if we find it blocking
-        with open(self._benchmark_filepath, "w") as f:
-            yaml.dump(self._benchmarks, f)
-
-    def _get_args_repr_score(self, *args, **kwargs):
-        """@public
-        Get a string representation of the args and kwargs and corresponding 'score' / 'norm'
-        The idea is that similar args have closer 'score'. Fuzzy logic
-
-        The code does the following:
-        1. It converts any args that are floats or ints to "number()" strings, and any args that are tensors to "shape()" strings
-        2. It converts any kwargs that are floats or ints to "number()" strings, and any kwargs that are tensors to "shape()" strings
-        3. The 'score' is given by the product of all the floats or ints and all the shape sizes.
-
-        :return: the string representation of the args and kwargs
-        :rtype: str
-        """
-        _norm = 1
-        _args = []
-        for arg in args:
-            if type(arg) in (float, int):
-                _args.append(f"number({arg})")
-                if arg == 0:
-                    arg = 1
-                _norm *= arg
-            elif hasattr(arg, "shape"):
-                _args.append(f"shape{arg.shape}")
-                _norm *= arg.size
-            else:
-                _args.append(arg)
-
-        _kwargs = {}
-        for k, v in kwargs.items():
-            if type(v) in (float, int):
-                _kwargs[k] = f"number({v})"
-                if v == 0:
-                    v = 1
-                _norm *= v
-            if hasattr(v, "shape"):
-                _kwargs[k] = f"shape{arg.shape}"
-                _norm *= v.size
-            else:
-                _kwargs[k] = v
-
-        return repr((_args, _kwargs)), _norm
-
-    def get_highest_divisor(self, size_, max_):
-        """
-        Returns the highest divisor of size_ that is still lower than max_
-        """
-        value = 1
-        for i in range(1, int(np.sqrt(size_) + 1)):
-            if size_ % i == 0:
-                if i * i != size_:
-                    div2 = size_ / i
-
-                    if i < max_:
-                        value = max(value, i)
-                    if div2 < max_:
-                        value = max(value, div2)
-        return int(value)
-
-    def get_work_group(self, device, shape):
-        """
-        Calculates work group size for a given device and shape of global work space
-        """
-
-        max_wg_dims = device.max_work_item_sizes[0:3]
-        max_glo_dims = device.max_work_group_size
-
-        three = self.get_highest_divisor(shape[2], max_wg_dims[2])
-        max_two = max_glo_dims / three
-        two = self.get_highest_divisor(shape[1], max_two)
-        one = 1
-        return (one, two, three)
-
-    def _check_max_slices(self, input, number_of_max_slices):
-        """@public
-        Checks if number of maximum slices is greater than 0
-        """
-        if number_of_max_slices < 1:
-            raise ValueError("This device doesn't have enough memory to run this function with this input")
-        elif input.shape[0] < number_of_max_slices:
-            return input.shape[0]
-        else:
-            return number_of_max_slices
-
-    def _check_max_buffer_size(self, size, device, n_slices):
-        """@public
-        Checks if buffer size is larger than device maximum memory allocation size and n_slices is 1 and raises appropriate errors that are handled in the _run function.
-        """
-        if size > device.max_mem_alloc_size and n_slices == 1:
-            raise ValueError(
-                "This device cannot handle this input size with these parameters, try using a smaller input or other parameters"
-            )
-
-        if size > device.max_mem_alloc_size:
-            raise cl.Error("Buffer size is larger than device maximum memory allocation size")
-
-        return size
-
-    #####################################################
-    #                   RUN METHODS                     #
-    # THESE SHOULD ALWAYS BE OVERRIDEN BY CHILD CLASSES #
-    #####################################################
-
-    def run(self, *args, **kwargs):
-        """
-        Runs the function with the given args and kwargs
-        Should be overridden by the any class that inherits from this class
-        """
-        return self._run(*args, **kwargs)
-
-    def _run_opencl(*args, **kwargs):
-        """@public
-        Runs the OpenCL version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_unthreaded(*args, **kwargs):
-        """@public
-        Runs the cython unthreaded version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded(*args, **kwargs):
-        """@public
-        Runs the cython threaded version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded_static(*args, **kwargs):
-        """@public
-        Runs the cython threaded static version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded_dynamic(*args, **kwargs):
-        """@public
-        Runs the cython threaded dynamic version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_threaded_guided(*args, **kwargs):
-        """@public
-        Runs the cython threaded guided version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_python(*args, **kwargs):
-        """@public
-        Runs the python version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_njit(*args, **kwargs):
-        """@public
-        Runs the njit version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_dask(*args, **kwargs):
-        """@public
-        Runs the dask version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_transonic(*args, **kwargs):
-        """@public
-        Runs the transonic version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
-
-    def _run_cuda(*args, **kwargs):
-        """@public
-        Runs the cuda version of the function
-        Should be overridden by the any class that inherits from this class
-        """
-        pass
+from liquid_engine import LiquidEngine
\ No newline at end of file
diff --git a/src/nanopyx/__opencl__.py b/src/nanopyx/__opencl__.py
index 8eeacfbe..b8830ded 100644
--- a/src/nanopyx/__opencl__.py
+++ b/src/nanopyx/__opencl__.py
@@ -8,8 +8,11 @@
     import pyopencl.array as cl_array
 
     devices = []
+    _fastest_device = None
+    max_perf = 0
+
     for platform in cl.get_platforms():
-        if "Microsoft" in platform.vendor:  # TODO this takes out integrated graphics
+        if "Microsoft" in platform.vendor:  # TODO this takes out emulated GPUs
             continue
         for dev in platform.get_devices():
             # check if the device is a GPU
@@ -19,7 +22,11 @@
                 cl_dp = False
             else:
                 cl_dp = False
-
+            
+            perf = dev.max_compute_units * dev.max_clock_frequency
+            if perf>max_perf:
+                max_perf = perf
+                _fastest_device = {"device": dev, "DP": cl_dp}
             devices.append({"device": dev, "DP": cl_dp})
 
 
@@ -28,6 +35,7 @@
     cl = None
     cl_array = None
     devices = None
+    _fastest_device = None
 
 
 def print_opencl_info():
diff --git a/src/nanopyx/core/analysis/_le_channel_registration.pyx b/src/nanopyx/core/analysis/_le_channel_registration.pyx
index 0ecff710..09c6e4c0 100644
--- a/src/nanopyx/core/analysis/_le_channel_registration.pyx
+++ b/src/nanopyx/core/analysis/_le_channel_registration.pyx
@@ -7,7 +7,7 @@ from cython.parallel import parallel, prange
 from libc.math cimport sqrt,pow
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .ccm cimport _calculate_slice_ccm
 
 from .estimate_shift import GetMaxOptimizer
@@ -97,8 +97,7 @@ class ChannelRegistrationEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ChannelRegistrationEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            unthreaded_=True, threaded_=True, threaded_static_=True, threaded_dynamic_=True, threaded_guided_=True, opencl_=True, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=None):
         return self._run(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity, run_type=run_type)
@@ -107,6 +106,10 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return super().benchmark(img_stack, img_ref, max_shift, blocks_per_axis, min_similarity)
 
     def _run_unthreaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @cython
+        """
         _runtype = "unthreaded".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -241,6 +244,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -375,6 +383,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded_guided(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded_guided".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -509,6 +522,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded_dynamic(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded_dynamic".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -643,6 +661,11 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
     def _run_threaded_static(self, float[:,:, :] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         _runtype = "threaded_static".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
 
@@ -777,8 +800,14 @@ class ChannelRegistrationEstimator(LiquidEngine):
         return np.array(translation_masks)
 
 
-    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device):
-        _runtype = "OpenCL_" + device["device"].name
+    def _run_opencl(self, float[:,:,:] img_stack, int ref_index, int max_shift, int blocks_per_axis, float min_similarity, device=None):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+
+        _runtype = "opencl"
         crsm = ShiftAndMagnify(verbose=False)
 
         cdef float[:, :] img_ref = np.asarray(img_stack[ref_index], dtype=np.float32)
diff --git a/src/nanopyx/core/analysis/_le_drift_calculator.pyx b/src/nanopyx/core/analysis/_le_drift_calculator.pyx
index 8c0658e0..25165f7f 100644
--- a/src/nanopyx/core/analysis/_le_drift_calculator.pyx
+++ b/src/nanopyx/core/analysis/_le_drift_calculator.pyx
@@ -25,10 +25,7 @@ class DriftEstimator(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "DriftEstimator"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing,
-            opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=False, 
-            threaded_dynamic_=False, threaded_guided_=False,
-            njit_=False, python_=False, transonic_=False, cuda_=False, dask_=False, verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, time_averaging: int = 2, max_drift: int = 5, ref_option: int = 0, run_type=None):
         return self._run(np.asarray(image).astype(np.float32), time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option, run_type=run_type)
@@ -37,7 +34,10 @@ class DriftEstimator(LiquidEngine):
         return super().benchmark(image, time_averaging=time_averaging, max_drift=max_drift, ref_option=ref_option)
 
     def _run_unthreaded(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
+        """
+        @cpu
+        @cython
+        """
         if not _check_even_square(image):
             image = _make_even_square(image)
 
@@ -126,7 +126,11 @@ class DriftEstimator(LiquidEngine):
 
         return np.asarray(output).astype(np.float32)
     def _run_threaded(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if not _check_even_square(image):
             image = _make_even_square(image)
 
@@ -214,280 +218,3 @@ class DriftEstimator(LiquidEngine):
                 output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
 
         return np.asarray(output).astype(np.float32)
-    def _run_threaded_guided(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
-        if not _check_even_square(image):
-            image = _make_even_square(image)
-
-        # get image dimensions, should already be an even square
-        cdef int n_slices = image.shape[0]
-        cdef int n_rows = image.shape[1]
-        cdef int n_cols = image.shape[2]
-
-        # ensures time averaging has an acceptable value
-        if time_averaging < 1:
-            time_averaging = 1
-        elif time_averaging > (n_slices//2):
-            time_averaging = n_slices//2
-
-        cdef int n_blocks = n_slices // time_averaging
-        
-        averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32)
-
-        cdef int idx
-        if time_averaging == 1:
-            averaged = image
-        else:
-            for idx in range(n_blocks):
-                averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0)
-
-        cdef float[:, :, :] ccm
-        cdef int row_start
-        cdef int col_start
-        if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols:
-            row_start = int(n_rows / 2 - max_drift)
-            col_start = int(n_cols / 2 - max_drift)
-            ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)]
-        else:
-            ccm = _calculate_ccm(averaged, ref_option)
-
-        cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32)
-        
-        cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32)
-
-        cdef float bias_row = 0.0
-        cdef float bias_col = 0.0
-        cdef float shift_x, shift_y
-
-        cdef int i
-        for i in range(n_blocks):
-
-            optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32))
-            shift_y, shift_x = optimizer.get_max()
-
-            drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3)
-            drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3)
-
-            if i == 0:
-                bias_row = drift_table[i, 0]
-                bias_col = drift_table[i, 1]
-            drift_table[i, 0] = drift_table[i, 0] - bias_row
-            drift_table[i, 1] = drift_table[i, 1] - bias_col
-
-            if ref_option == 1 and i > 0:
-                drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0]
-                drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1]
-
-        cdef float[:] drift_x, drift_y
-        if time_averaging > 1:
-            lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int)
-            x_interpolator = interp1d(
-                lin, np.array(drift_table[:, 1]), kind="cubic"
-            ) 
-            y_interpolator = interp1d(
-                lin, np.array(drift_table[:, 0]), kind="cubic"
-            )
-
-            drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 1] = drift_x
-            drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 2] = drift_y
-
-        else:
-            output[:, 1] = drift_table[:, 1] # switch order of rows and cols
-            output[:, 2] = drift_table[:, 0] # switch order of rows and cols
-
-        cdef int s
-        with nogil:
-            for s in prange(n_slices,schedule="guided"): 
-                output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
-
-        return np.asarray(output).astype(np.float32)
-    def _run_threaded_dynamic(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
-        if not _check_even_square(image):
-            image = _make_even_square(image)
-
-        # get image dimensions, should already be an even square
-        cdef int n_slices = image.shape[0]
-        cdef int n_rows = image.shape[1]
-        cdef int n_cols = image.shape[2]
-
-        # ensures time averaging has an acceptable value
-        if time_averaging < 1:
-            time_averaging = 1
-        elif time_averaging > (n_slices//2):
-            time_averaging = n_slices//2
-
-        cdef int n_blocks = n_slices // time_averaging
-        
-        averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32)
-
-        cdef int idx
-        if time_averaging == 1:
-            averaged = image
-        else:
-            for idx in range(n_blocks):
-                averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0)
-
-        cdef float[:, :, :] ccm
-        cdef int row_start
-        cdef int col_start
-        if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols:
-            row_start = int(n_rows / 2 - max_drift)
-            col_start = int(n_cols / 2 - max_drift)
-            ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)]
-        else:
-            ccm = _calculate_ccm(averaged, ref_option)
-
-        cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32)
-        
-        cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32)
-
-        cdef float bias_row = 0.0
-        cdef float bias_col = 0.0
-        cdef float shift_x, shift_y
-
-        cdef int i
-        for i in range(n_blocks):
-
-            optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32))
-            shift_y, shift_x = optimizer.get_max()
-
-            drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3)
-            drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3)
-
-            if i == 0:
-                bias_row = drift_table[i, 0]
-                bias_col = drift_table[i, 1]
-            drift_table[i, 0] = drift_table[i, 0] - bias_row
-            drift_table[i, 1] = drift_table[i, 1] - bias_col
-
-            if ref_option == 1 and i > 0:
-                drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0]
-                drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1]
-
-        cdef float[:] drift_x, drift_y
-        if time_averaging > 1:
-            lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int)
-            x_interpolator = interp1d(
-                lin, np.array(drift_table[:, 1]), kind="cubic"
-            ) 
-            y_interpolator = interp1d(
-                lin, np.array(drift_table[:, 0]), kind="cubic"
-            )
-
-            drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 1] = drift_x
-            drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 2] = drift_y
-
-        else:
-            output[:, 1] = drift_table[:, 1] # switch order of rows and cols
-            output[:, 2] = drift_table[:, 0] # switch order of rows and cols
-
-        cdef int s
-        with nogil:
-            for s in prange(n_slices,schedule="dynamic"): 
-                output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
-
-        return np.asarray(output).astype(np.float32)
-    def _run_threaded_static(self, float[:, :, :] image,  int time_averaging=2, int max_drift=5, int ref_option=0):
-
-        if not _check_even_square(image):
-            image = _make_even_square(image)
-
-        # get image dimensions, should already be an even square
-        cdef int n_slices = image.shape[0]
-        cdef int n_rows = image.shape[1]
-        cdef int n_cols = image.shape[2]
-
-        # ensures time averaging has an acceptable value
-        if time_averaging < 1:
-            time_averaging = 1
-        elif time_averaging > (n_slices//2):
-            time_averaging = n_slices//2
-
-        cdef int n_blocks = n_slices // time_averaging
-        
-        averaged = np.empty((n_blocks, n_rows, n_cols), dtype=np.float32)
-
-        cdef int idx
-        if time_averaging == 1:
-            averaged = image
-        else:
-            for idx in range(n_blocks):
-                averaged[idx, :, :] = np.mean(image[idx*time_averaging:(idx+1)*time_averaging, :, :], axis=0)
-
-        cdef float[:, :, :] ccm
-        cdef int row_start
-        cdef int col_start
-        if max_drift > 0 and max_drift * 2 + 1 < n_rows and max_drift * 2 + 1 < n_cols:
-            row_start = int(n_rows / 2 - max_drift)
-            col_start = int(n_cols / 2 - max_drift)
-            ccm = _calculate_ccm(averaged, ref_option)[:, row_start : row_start + (max_drift * 2), col_start : col_start + (max_drift * 2)]
-        else:
-            ccm = _calculate_ccm(averaged, ref_option)
-
-        cdef float[:, :] drift_table = np.zeros((n_blocks, 2), dtype=np.float32)
-        
-        cdef float[:, :] output = np.zeros((image.shape[0], 3), dtype=np.float32)
-
-        cdef float bias_row = 0.0
-        cdef float bias_col = 0.0
-        cdef float shift_x, shift_y
-
-        cdef int i
-        for i in range(n_blocks):
-
-            optimizer = GetMaxOptimizer(np.ascontiguousarray(ccm[i], dtype=np.float32))
-            shift_y, shift_x = optimizer.get_max()
-
-            drift_table[i, 0] = round((ccm.shape[1]/2) - shift_y - 0.5, 3)
-            drift_table[i, 1] = round((ccm.shape[2]/2) - shift_x - 0.5, 3)
-
-            if i == 0:
-                bias_row = drift_table[i, 0]
-                bias_col = drift_table[i, 1]
-            drift_table[i, 0] = drift_table[i, 0] - bias_row
-            drift_table[i, 1] = drift_table[i, 1] - bias_col
-
-            if ref_option == 1 and i > 0:
-                drift_table[i, 0] = drift_table[i, 0] + drift_table[i-1, 0]
-                drift_table[i, 1] = drift_table[i, 1] + drift_table[i-1, 1]
-
-        cdef float[:] drift_x, drift_y
-        if time_averaging > 1:
-            lin = np.linspace(1, image.shape[0], num=drift_table.shape[0], endpoint=True, dtype=int)
-            x_interpolator = interp1d(
-                lin, np.array(drift_table[:, 1]), kind="cubic"
-            ) 
-            y_interpolator = interp1d(
-                lin, np.array(drift_table[:, 0]), kind="cubic"
-            )
-
-            drift_x = np.asarray(x_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 1] = drift_x
-            drift_y = np.asarray(y_interpolator(range(1, image.shape[0]+1)), dtype=np.float32).reshape(n_slices)
-            output[:, 2] = drift_y
-
-        else:
-            output[:, 1] = drift_table[:, 1] # switch order of rows and cols
-            output[:, 2] = drift_table[:, 0] # switch order of rows and cols
-
-        cdef int s
-        with nogil:
-            for s in prange(n_slices,schedule="static"): 
-                output[s, 0] = sqrt((output[s, 1]*output[s, 1]) + (output[s, 2] * output[s, 2]))
-
-        return np.asarray(output).astype(np.float32)
-
-
-# % if sch=='unthreaded':
-#     for i in range(n_blocks):
-#     % elif sch=='threaded':
-#     for i in prange(n_blocks):
-#     % else:
-#     for i in prange(n_blocks,schedule="static"):
-#     %endif
-#         average[i] = np.mean(image[i*time_averaging:(i+1)*time_averaging, :, :], axis=0)
\ No newline at end of file
diff --git a/src/nanopyx/core/transform/_le_convolution.pyx b/src/nanopyx/core/transform/_le_convolution.pyx
index 5b3b541b..ce2fde47 100644
--- a/src/nanopyx/core/transform/_le_convolution.pyx
+++ b/src/nanopyx/core/transform/_le_convolution.pyx
@@ -13,7 +13,7 @@ from libc.math cimport cos, sin
 from .__interpolation_tools__ import check_image, value2array
 from .convolution import check_array, convolution2D_cuda, convolution2D_dask, convolution2D_numba, convolution2D_python, convolution2D_transonic
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 class Convolution(LiquidEngine):
@@ -24,11 +24,7 @@ class Convolution(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Conv2D"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True,
-            njit_=True, python_=True, transonic_=True, cuda_=True, dask_=True,
-            verbose=verbose)
+            clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, kernel, run_type=None):
         image = check_array(image)
@@ -38,7 +34,10 @@ class Convolution(LiquidEngine):
         return super().benchmark(image, kernel)
 
     def _run_unthreaded(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -73,7 +72,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -108,7 +111,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded_guided(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -143,7 +150,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded_dynamic(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -178,7 +189,11 @@ class Convolution(LiquidEngine):
         return conv_out
 
     def _run_threaded_static(self, float[:,:] image, float[:,:] kernel):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nRows = image.shape[0]
         cdef int nCols = image.shape[1]
 
@@ -213,8 +228,13 @@ class Convolution(LiquidEngine):
         return conv_out
 
 
-    def _run_opencl(self, image, kernel, device):
-        
+    def _run_opencl(self, image, kernel, device=None):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -245,16 +265,35 @@ class Convolution(LiquidEngine):
         return image_out
 
     def _run_python(self, image, kernel):
+        """
+        @cpu
+        """
         return convolution2D_python(image, kernel).astype(np.float32)
 
     def _run_transonic(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_transonic(image, kernel).astype(np.float32)
 
     def _run_dask(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        """
         return convolution2D_dask(image, kernel).astype(np.float32)
 
     def _run_cuda(self, image, kernel):
+        """
+        @gpu
+        """
         return convolution2D_cuda(image, kernel).astype(np.float32)
 
     def _run_njit(self, image, kernel):
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         return convolution2D_numba(image, kernel).astype(np.float32)
diff --git a/src/nanopyx/core/transform/_le_esrrf.pyx b/src/nanopyx/core/transform/_le_esrrf.pyx
index 4c25f36b..df40fa2e 100644
--- a/src/nanopyx/core/transform/_le_esrrf.pyx
+++ b/src/nanopyx/core/transform/_le_esrrf.pyx
@@ -10,7 +10,7 @@ from libc.math cimport cos, sin
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify
 from ._le_roberts_cross_gradients import GradientRobertsCross
@@ -24,10 +24,7 @@ class eSRRF(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_ST"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, magnification: int = 5, radius: float = 1.5, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type=None):
         image = check_image(image)
@@ -38,6 +35,12 @@ class eSRRF(LiquidEngine):
         return super().benchmark(image, magnification=magnification, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
     def _run_opencl(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+
         # TODO doIntensityWeighting is irrelevant on gpu2
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -148,6 +151,11 @@ class eSRRF(LiquidEngine):
         return output_image
 
     def _run_threaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -161,6 +169,11 @@ class eSRRF(LiquidEngine):
 
         return radial_gradients
     def _run_threaded_guided(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded_guided".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -174,6 +187,11 @@ class eSRRF(LiquidEngine):
 
         return radial_gradients
     def _run_threaded_dynamic(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded_dynamic".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -187,6 +205,11 @@ class eSRRF(LiquidEngine):
 
         return radial_gradients
     def _run_threaded_static(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         runtype = "threaded_static".capitalize()
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
@@ -201,6 +224,10 @@ class eSRRF(LiquidEngine):
         return radial_gradients
 
     def _run_unthreaded(self, image, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
+        """
+        @cpu
+        @cython
+        """
         runtype = "Unthreaded"
         crsm = ShiftAndMagnify(verbose=False)
         rbc = GradientRobertsCross(verbose=False)
diff --git a/src/nanopyx/core/transform/_le_esrrf3d.pyx b/src/nanopyx/core/transform/_le_esrrf3d.pyx
index b3c2c5a7..e1f27f7b 100644
--- a/src/nanopyx/core/transform/_le_esrrf3d.pyx
+++ b/src/nanopyx/core/transform/_le_esrrf3d.pyx
@@ -26,9 +26,7 @@ class eSRRF3D(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "eSRRF_3D"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=False, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True, verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         self._gradients_s_interpolated = None
         self._gradients_r_interpolated = None
         self._gradients_c_interpolated = None
@@ -43,10 +41,10 @@ class eSRRF3D(LiquidEngine):
         if image.dtype != np.float32:
             image = image.astype(np.float32)
         if len(image.shape) == 4:
-            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
+            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
         elif len(image.shape) == 3:
             image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
-            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
+            return self._run(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, radius_z=radius_z, ratio_px=ratio_px, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting, run_type=run_type)
 
     def benchmark(self, image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
         if image.dtype != np.float32:
@@ -57,8 +55,12 @@ class eSRRF3D(LiquidEngine):
             image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
             return super().benchmark(image, magnification_xy=magnification_xy, magnification_z=magnification_z, radius=radius, sensitivity=sensitivity, doIntensityWeighting=doIntensityWeighting)
 
-    def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -121,8 +123,12 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded_guided(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -185,8 +191,12 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded_dynamic(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -249,8 +259,12 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_threaded_static(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -313,8 +327,11 @@ class eSRRF3D(LiquidEngine):
                                 rgc_map[f, sM, rM, cM] = rgc_val
         
         return np.asarray(rgc_map)
-    def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True, run_type="Threaded"):
-
+    def _run_unthreaded(self, float[:,:,:,:] image, magnification_xy: int = 5, magnification_z: int = 5, radius: float = 1.5, radius_z: float = 1.5, ratio_px: float = 4.0, sensitivity: float = 1, doIntensityWeighting: bool = True):
+        """
+        @cpu
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
diff --git a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
index 7a348921..d5f62230 100644
--- a/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_bicubic.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_bicubic.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_bicubic"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_bicubic"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_bicubic"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
index 99ca8d07..f080fd06 100644
--- a/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_catmull_rom.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_catmull_rom.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_catmull_rom"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_catmull_rom"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_catmull_rom"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
index bf1cd551..4537579d 100644
--- a/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_lanczos.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_lanczos.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_lanczos"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_lanczos"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_lanczos"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
index 0df765ae..d53ee3d4 100644
--- a/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
+++ b/src/nanopyx/core/transform/_le_interpolation_nearest_neighbor.pyx
@@ -7,7 +7,7 @@ from libc.math cimport cos, sin, pi, hypot, exp, log
 
 from .__interpolation_tools__ import check_image, value2array
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_interpolation_nearest_neighbor.h":
@@ -21,10 +21,7 @@ class ShiftAndMagnify(LiquidEngine):
 
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftMagnify_nearest_neighbor"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True,
-                        verbose=verbose)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, shift_row, shift_col, float magnification_row, float magnification_col, run_type=None) -> np.ndarray:
         """
@@ -63,8 +60,12 @@ class ShiftAndMagnify(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, magnification_row, magnification_col)
 
-    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device, int mem_div=1) -> np.ndarray:
-
+    def _run_opencl(self, image, shift_row, shift_col, float magnification_row, float magnification_col, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
@@ -112,6 +113,10 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -136,6 +141,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -160,6 +170,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -184,6 +199,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -208,6 +228,11 @@ class ShiftAndMagnify(LiquidEngine):
         return image_out
 
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float magnification_row, float magnification_col) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -238,11 +263,9 @@ class ShiftScaleRotate(LiquidEngine):
     Shift, Scale and Rotate (affine transform) using the NanoPyx Liquid Engine
     """
 
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "ShiftScaleRotate_nearest_neighbor"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
         
     def run(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, run_type=None) -> np.ndarray:
         """
@@ -285,7 +308,12 @@ class ShiftScaleRotate(LiquidEngine):
         image = check_image(image)
         return super().benchmark(image, shift_row, shift_col, scale_row, scale_col, angle)
 
-    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, shift_row, shift_col, float scale_row, float scale_col, float angle, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -337,6 +365,10 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
 
     def _run_unthreaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -371,6 +403,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -405,6 +442,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_guided(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -439,6 +481,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_dynamic(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -473,6 +520,11 @@ class ShiftScaleRotate(LiquidEngine):
         return image_out
         
     def _run_threaded_static(self, float[:,:,:] image, float shift_row, float shift_col, float scale_row, float scale_col, float angle) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -512,11 +564,9 @@ class PolarTransform(LiquidEngine):
     Polar Transformations using the NanoPyx Liquid Engine
     """
     
-    def __init__(self, clear_benchmarks=False, testing=False):
+    def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "PolarTransform_nearest_neighbor"
-        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, 
-                        opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-                        threaded_dynamic_=True, threaded_guided_=True)
+        super().__init__(clear_benchmarks=clear_benchmarks, testing=testing, verbose=verbose)
 
     def run(self, image, tuple out_shape, str scale, run_type=None) -> np.ndarray:
         """
@@ -553,7 +603,12 @@ class PolarTransform(LiquidEngine):
             scale = 'linear'
         return super().benchmark(image, nrow, ncol, scale)
 
-    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device, int mem_div=1):
+    def _run_opencl(self, image, int nrow, int ncol, str scale, dict device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -609,7 +664,10 @@ class PolarTransform(LiquidEngine):
         return output
         
     def _run_unthreaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -643,7 +701,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -677,7 +739,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_guided(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -711,7 +777,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_dynamic(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
@@ -745,7 +815,11 @@ class PolarTransform(LiquidEngine):
 
         return image_out
     def _run_threaded_static(self, float[:,:,:] image, int nrow, int ncol, str scale):
-        
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef int rows = image.shape[1]
         cdef int cols = image.shape[2]
diff --git a/src/nanopyx/core/transform/_le_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_nlm_denoising.pyx
index d13a94de..b5aab716 100644
--- a/src/nanopyx/core/transform/_le_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_nlm_denoising.pyx
@@ -12,7 +12,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 import os
 os.environ['PYOPENCL_NO_CACHE']='1'
@@ -36,9 +36,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -74,6 +71,9 @@ class NLMDenoising(LiquidEngine):
 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -81,6 +81,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -158,6 +162,11 @@ class NLMDenoising(LiquidEngine):
                                             pad_size: -pad_size]).astype(np.float32))
 
     def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -218,6 +227,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -278,6 +292,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -338,6 +357,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -399,7 +423,13 @@ class NLMDenoising(LiquidEngine):
 
     
 
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
+        
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
index 506ce316..f16d28a1 100644
--- a/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_patch_nlm_denoising.pyx
@@ -10,7 +10,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_integral_image.h":
@@ -29,9 +29,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_patch"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -66,6 +63,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma)
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=True)
@@ -73,6 +73,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         cdef float distance_cutoff = 5.0
         cdef float var = sigma * sigma
 
@@ -150,6 +154,11 @@ class NLMDenoising(LiquidEngine):
                                             pad_size: -pad_size]).astype(np.float32))
 
     def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -218,6 +227,11 @@ class NLMDenoising(LiquidEngine):
                         
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -286,6 +300,11 @@ class NLMDenoising(LiquidEngine):
                         
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -354,6 +373,11 @@ class NLMDenoising(LiquidEngine):
                         
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef float distance_cutoff = 5.0
 
@@ -423,7 +447,12 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(output_result[:, pad_size: -pad_size,pad_size: -pad_size]).astype(np.float32))
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
diff --git a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
index 206d8824..b0358c27 100644
--- a/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
+++ b/src/nanopyx/core/transform/_le_pixel_nlm_denoising.pyx
@@ -12,7 +12,7 @@ from cython.parallel import parallel, prange
 
 from .__interpolation_tools__ import check_image
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 
 
 cdef extern from "_c_patch_distance.h":
@@ -28,9 +28,6 @@ class NLMDenoising(LiquidEngine):
         self._designation = "NLMDenoising_pixel"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True,
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
-            python_=True,
             verbose=verbose)
 
     def run(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0, run_type=None) -> np.ndarray:
@@ -65,6 +62,9 @@ class NLMDenoising(LiquidEngine):
         return super().benchmark(image, patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma) 
 
     def _run_python(self, np.ndarray image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        """
         out = np.zeros_like(image)
         for i in range(image.shape[0]):
             out[i] = denoise_nl_means(image[i], patch_size=patch_size, patch_distance=patch_distance, h=h, sigma=sigma, fast_mode=False)
@@ -72,6 +72,10 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(out)
 
     def _run_unthreaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -132,6 +136,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -192,6 +201,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_guided(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -252,6 +266,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_dynamic(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -312,6 +331,11 @@ class NLMDenoising(LiquidEngine):
         return np.squeeze(np.asarray(result))
 
     def _run_threaded_static(self, float[:, :, :] image, int patch_size=7, int patch_distance=11, float h=0.1, float sigma=0.0) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         if patch_size % 2 == 0:
             patch_size = patch_size + 1  # odd value for symmetric patch
 
@@ -373,7 +397,13 @@ class NLMDenoising(LiquidEngine):
 
     
         
-    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device, int mem_div=1) -> np.ndarray:
+    def _run_opencl(self, image, int patch_size, int patch_distance, float h, float sigma, dict device=None, int mem_div=1) -> np.ndarray:
+        """
+        @gpu
+        @cython
+        """
+        if device is None:
+            device = _fastest_device
         cl_ctx = cl.Context([device['device']])
         dc = device['device']
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
index 69f4f656..0e9d1ab1 100644
--- a/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
+++ b/src/nanopyx/core/transform/_le_radial_gradient_convergence.pyx
@@ -6,7 +6,7 @@ cimport numpy as np
 from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pow
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 from .__interpolation_tools__ import check_image
 
@@ -22,8 +22,6 @@ class RadialGradientConvergence(LiquidEngine):
         self._designation = "RadialGradientConvergence"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
 
@@ -41,7 +39,10 @@ class RadialGradientConvergence(LiquidEngine):
         return super().benchmark(gradient_col_interp, gradient_row_interp, image_interp, magnification, radius, sensitivity, doIntensityWeighting)
 
     def _run_unthreaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -70,7 +71,11 @@ class RadialGradientConvergence(LiquidEngine):
         return np.asarray(rgc_map,dtype=np.float32)
 
     def _run_threaded(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -97,7 +102,11 @@ class RadialGradientConvergence(LiquidEngine):
                             rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION,  fwhm, tSO, tSS, _sensitivity)
         return np.asarray(rgc_map,dtype=np.float32)
     def _run_threaded_guided(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -124,7 +133,11 @@ class RadialGradientConvergence(LiquidEngine):
                             rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION,  fwhm, tSO, tSS, _sensitivity)
         return np.asarray(rgc_map,dtype=np.float32)
     def _run_threaded_dynamic(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -151,7 +164,11 @@ class RadialGradientConvergence(LiquidEngine):
                             rgc_map[f, rM, cM] = _c_calculate_rgc(cM, rM, &gradient_col_interp[f,0,0], &gradient_row_interp[f,0,0], colsM, rowsM, _magnification, Gx_Gy_MAGNIFICATION,  fwhm, tSO, tSS, _sensitivity)
         return np.asarray(rgc_map,dtype=np.float32)
     def _run_threaded_static(self, float[:,:,:] gradient_col_interp, float[:,:,:] gradient_row_interp, float[:,:,:] image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef float sigma = radius / 2.355
         cdef float fwhm = radius
         cdef float tSS = 2 * sigma * sigma
@@ -180,6 +197,11 @@ class RadialGradientConvergence(LiquidEngine):
 
     
     def _run_opencl(self, gradient_col_interp, gradient_row_interp, image_interp, magnification=5, radius=1.5, sensitivity=1, doIntensityWeighting=True, device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # gradient gxgymag*mag*size
         # image_interp = mag*size
diff --git a/src/nanopyx/core/transform/_le_radiality.pyx b/src/nanopyx/core/transform/_le_radiality.pyx
index 76a5c313..bad66375 100644
--- a/src/nanopyx/core/transform/_le_radiality.pyx
+++ b/src/nanopyx/core/transform/_le_radiality.pyx
@@ -7,7 +7,7 @@ from cython.parallel import parallel, prange
 
 from libc.math cimport sqrt, pi, fabs, cos, sin
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from .__interpolation_tools__ import check_image
 
 from ._le_interpolation_catmull_rom import ShiftAndMagnify as CRShiftAndMagnify
@@ -33,8 +33,6 @@ class Radiality(LiquidEngine):
         self._designation = "Radiality"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=False, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True, run_type = None): 
@@ -47,7 +45,7 @@ class Radiality(LiquidEngine):
         image_interp = check_image(image_interp)
         return super().benchmark(image, image_interp, magnification, ringRadius, border, radialityPositivityConstraint, doIntensityWeighting)
 
-    def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
+    """def _run_unthreaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
 
         cdef int _magnification = magnification
         cdef int _border = border
@@ -83,10 +81,14 @@ class Radiality(LiquidEngine):
                         else:
                             imRad[f,j,i] = _c_calculate_radiality_per_subpixel(i, j, &imGx[f,0,0], &imGy[f,0,0], xRingCoordinates, yRingCoordinates, _magnification, _ringRadius, nRingCoordinates, _radialityPositivityConstraint, h, w)
 
-        return np.asarray(imRad)
+        return np.asarray(imRad)"""
 
     def _run_threaded(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -122,7 +124,11 @@ class Radiality(LiquidEngine):
 
         return np.asarray(imRad)
     def _run_threaded_guided(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -158,7 +164,11 @@ class Radiality(LiquidEngine):
 
         return np.asarray(imRad)
     def _run_threaded_dynamic(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -194,7 +204,11 @@ class Radiality(LiquidEngine):
 
         return np.asarray(imRad)
     def _run_threaded_static(self, float[:,:,:] image, float[:,:,:] image_interp, magnification: int = 5, ringRadius: float = 0.5, border: int = 0, radialityPositivityConstraint: bool = True, doIntensityWeighting: bool = True):
-
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         cdef int _magnification = magnification
         cdef int _border = border
         cdef float _ringRadius = ringRadius * magnification
@@ -232,6 +246,11 @@ class Radiality(LiquidEngine):
 
     
     def _run_opencl(self, image, image_interp, magnification=5, ringRadius=0.5, border=0, radialityPositivityConstraint=True, doIntensityWeighting=True, device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         cl_ctx = cl.Context([device['device']])
         cl_queue = cl.CommandQueue(cl_ctx)
diff --git a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
index 28fd22f5..002423ed 100644
--- a/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
+++ b/src/nanopyx/core/transform/_le_roberts_cross_gradients.pyx
@@ -2,7 +2,7 @@
 
 import numpy as np
 cimport numpy as np
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ...__liquid_engine__ import LiquidEngine
 
 from cython.parallel import prange
@@ -17,8 +17,6 @@ class GradientRobertsCross(LiquidEngine):
         self._designation = "GradientRobertsCross"
         super().__init__(
             clear_benchmarks=clear_benchmarks, testing=testing,
-            unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, opencl_=True,
             verbose=verbose)
 
     def run(self, image, run_type = None):
@@ -30,7 +28,10 @@ class GradientRobertsCross(LiquidEngine):
         return super().benchmark(image)
     
     def _run_unthreaded(self, float[:,:,:] image):
-
+        """
+        @cpu
+        @cython
+        """
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
         cdef float [:,:,:] gradient_row = np.zeros_like(image)
@@ -43,6 +44,11 @@ class GradientRobertsCross(LiquidEngine):
         return gradient_col, gradient_row
     
     def _run_threaded(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -55,6 +61,11 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
     def _run_threaded_guided(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -67,6 +78,11 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
     def _run_threaded_dynamic(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -79,6 +95,11 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
     def _run_threaded_static(self, float[:,:,:] image):
+        """
+        @cpu
+        @threaded
+        @cython
+        """
 
         cdef int nFrames = image.shape[0]
         cdef float [:,:,:] gradient_col = np.zeros_like(image) 
@@ -91,7 +112,12 @@ class GradientRobertsCross(LiquidEngine):
         
         return gradient_col, gradient_row
 
-    def _run_opencl(self, float[:,:,:] image, dict device, int mem_div=1):
+    def _run_opencl(self, float[:,:,:] image, dict device=None, int mem_div=1):
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
diff --git a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
index a61fca34..d28896a9 100644
--- a/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
+++ b/src/nanopyx/core/utils/_le_mandelbrot_benchmark.pyx
@@ -7,7 +7,7 @@ cimport numpy as np
 from cython.parallel import prange
 
 from ...__liquid_engine__ import LiquidEngine
-from ...__opencl__ import cl, cl_array
+from ...__opencl__ import cl, cl_array, _fastest_device
 from ._le_mandelbrot_benchmark_ import mandelbrot as _py_mandelbrot
 from ._le_mandelbrot_benchmark_ import njit_mandelbrot as _njit_mandelbrot
 
@@ -23,9 +23,7 @@ class MandelbrotBenchmark(LiquidEngine):
     def __init__(self, clear_benchmarks=False, testing=False, verbose=True):
         self._designation = "Mandelbrot_Benchmark"
         super().__init__(
-            clear_benchmarks=clear_benchmarks, testing=testing, 
-            opencl_=True, unthreaded_=True, threaded_=True, threaded_static_=True, 
-            threaded_dynamic_=True, threaded_guided_=True, python_=True, njit_=True,
+            clear_benchmarks=clear_benchmarks, testing=testing,
             verbose=verbose)
 
     def run(self, int size=1000, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1, run_type=None) -> np.ndarray:
@@ -43,7 +41,12 @@ class MandelbrotBenchmark(LiquidEngine):
     def benchmark(self, int size, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1):
         return super().benchmark(size, r_start, r_end, c_start, c_end)
 
-    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device) -> np.ndarray:
+    def _run_opencl(self, int size, float r_start, float r_end, float c_start, float c_end, dict device=None) -> np.ndarray:
+        """
+        @gpu
+        """
+        if device is None:
+            device = _fastest_device
 
         # QUEUE AND CONTEXT
         cl_ctx = cl.Context([device['device']])
@@ -75,6 +78,10 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot.get()
 
     def _run_unthreaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -91,6 +98,11 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot
 
     def _run_threaded(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -106,6 +118,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
         return im_mandelbrot
     def _run_threaded_guided(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -121,6 +138,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
         return im_mandelbrot
     def _run_threaded_dynamic(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -136,6 +158,11 @@ class MandelbrotBenchmark(LiquidEngine):
 
         return im_mandelbrot
     def _run_threaded_static(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @cython
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         cdef int[:,:] _im_mandelbrot = im_mandelbrot
 
@@ -152,11 +179,19 @@ class MandelbrotBenchmark(LiquidEngine):
         return im_mandelbrot
 
     def _run_python(self, int size, float r_start, float r_end, float c_start, float c_end) -> np.ndarray:
+        """
+        @cpu
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _py_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot
 
     def _run_njit(self, int size=10, float r_start=-1.5, float r_end=0.5, float c_start=-1, float c_end=1) -> np.ndarray:
+        """
+        @cpu
+        @threaded
+        @numba
+        """
         im_mandelbrot = np.empty((size, size), dtype=np.int32)
         _njit_mandelbrot(im_mandelbrot, r_start, r_end, c_start, c_end)
         return im_mandelbrot