diff --git a/examples/hyperactive_intro.ipynb b/examples/hyperactive_intro.ipynb
index 5a047b32..149ce654 100644
--- a/examples/hyperactive_intro.ipynb
+++ b/examples/hyperactive_intro.ipynb
@@ -51,17 +51,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "8c428229",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "def sphere(opt):\n",
-    "    x = opt[\"x\"]\n",
-    "    y = opt[\"y\"]\n",
-    "\n",
-    "    return -x**2 - y**2"
-   ]
+   "source": "\"\"\"Hyperactive optimization library introduction notebook.\n\nThis notebook demonstrates unified interfaces for optimizers and experiments\nusing the Hyperactive optimization library.\n\"\"\"\n\n\ndef sphere(opt):\n    \"\"\"Evaluate sphere function for optimization.\n\n    Parameters\n    ----------\n    opt : dict\n        Dictionary with 'x' and 'y' keys containing numeric values.\n\n    Returns\n    -------\n    float\n        Negative sum of squares (for maximization).\n    \"\"\"\n    x = opt[\"x\"]\n    y = opt[\"y\"]\n\n    return -(x**2) - y**2"
   },
   {
    "cell_type": "markdown",
@@ -139,7 +133,7 @@
    "source": [
     "from hyperactive.experiment.bench import Parabola\n",
     "\n",
-    "?Parabola"
+    "Parabola?"
    ]
   },
   {
@@ -319,27 +313,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "57110e86",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from hyperactive.experiment.integrations import SklearnCvExperiment\n",
-    "from sklearn.datasets import load_iris\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "from sklearn.model_selection import KFold\n",
-    "\n",
-    "X, y = load_iris(return_X_y=True)\n",
-    "\n",
-    "sklearn_exp = SklearnCvExperiment(\n",
-    "    estimator=SVC(),\n",
-    "    scoring=accuracy_score,\n",
-    "    cv=KFold(n_splits=3, shuffle=True),\n",
-    "    X=X,\n",
-    "    y=y,\n",
-    ")"
-   ]
+   "source": "from sklearn.datasets import load_iris\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import KFold\nfrom sklearn.svm import SVC\n\nfrom hyperactive.experiment.integrations import SklearnCvExperiment\n\nX, y = load_iris(return_X_y=True)\n\nsklearn_exp = SklearnCvExperiment(\n    estimator=SVC(),\n    scoring=accuracy_score,\n    cv=KFold(n_splits=3, shuffle=True),\n    X=X,\n    y=y,\n)"
   },
   {
    "cell_type": "markdown",
@@ -500,57 +478,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "ab78b796",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "def sphere(opt):\n",
-    "    x = opt[\"x\"]\n",
-    "    y = opt[\"y\"]\n",
-    "\n",
-    "    return -x**2 - y**2"
-   ]
+   "source": "def sphere(opt):\n    \"\"\"Evaluate sphere function for optimization.\n\n    Parameters\n    ----------\n    opt : dict\n        Dictionary with 'x' and 'y' keys containing numeric values.\n\n    Returns\n    -------\n    float\n        Negative sum of squares (for maximization).\n    \"\"\"\n    x = opt[\"x\"]\n    y = opt[\"y\"]\n\n    return -(x**2) - y**2"
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "7104e5ec",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                                                                       \r"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'x': np.float64(0.10101010101010033), 'y': np.float64(0.10101010101010033)}"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "from hyperactive.opt import HillClimbing\n",
-    "\n",
-    "hillclimbing_config = {\n",
-    "    \"search_space\": {\n",
-    "        \"x\": np.linspace(-10, 10, 100),\n",
-    "        \"y\": np.linspace(-10, 10, 100),\n",
-    "    },\n",
-    "    \"n_iter\": 1000,\n",
-    "}\n",
-    "hill_climbing = HillClimbing(**hillclimbing_config, experiment=sphere)\n",
-    "\n",
-    "hill_climbing.solve()"
-   ]
+   "outputs": [],
+   "source": "import numpy as np\n\nfrom hyperactive.opt import HillClimbing\n\nhillclimbing_config = {\n    \"search_space\": {\n        \"x\": np.linspace(-10, 10, 100),\n        \"y\": np.linspace(-10, 10, 100),\n    },\n    \"n_iter\": 1000,\n}\nhill_climbing = HillClimbing(**hillclimbing_config, experiment=sphere)\n\nhill_climbing.solve()"
   },
   {
    "cell_type": "markdown",
@@ -562,56 +502,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "5e2328c9",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from hyperactive.experiment.integrations import SklearnCvExperiment\n",
-    "from sklearn.datasets import load_iris\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "from sklearn.model_selection import KFold\n",
-    "\n",
-    "X, y = load_iris(return_X_y=True)\n",
-    "\n",
-    "sklearn_exp = SklearnCvExperiment(\n",
-    "    estimator=SVC(),\n",
-    "    scoring=accuracy_score,\n",
-    "    cv=KFold(n_splits=3, shuffle=True),\n",
-    "    X=X,\n",
-    "    y=y,\n",
-    ")"
-   ]
+   "source": "from sklearn.datasets import load_iris\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import KFold\nfrom sklearn.svm import SVC\n\nfrom hyperactive.experiment.integrations import SklearnCvExperiment\n\nX, y = load_iris(return_X_y=True)\n\nsklearn_exp = SklearnCvExperiment(\n    estimator=SVC(),\n    scoring=accuracy_score,\n    cv=KFold(n_splits=3, shuffle=True),\n    X=X,\n    y=y,\n)"
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "e9a07a73",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'C': 0.01, 'gamma': 1}"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from hyperactive.opt import GridSearchSk as GridSearch\n",
-    "\n",
-    "param_grid = {\n",
-    "    \"C\": [0.01, 0.1, 1, 10],\n",
-    "    \"gamma\": [0.0001, 0.01, 0.1, 1, 10],\n",
-    "}\n",
-    "grid_search = GridSearch(param_grid=param_grid, experiment=sklearn_exp)\n",
-    "\n",
-    "grid_search.solve()"
-   ]
+   "outputs": [],
+   "source": "from hyperactive.opt import GridSearchSk as GridSearch\n\nparam_grid = {\n    \"C\": [0.01, 0.1, 1, 10],\n    \"gamma\": [0.0001, 0.01, 0.1, 1, 10],\n}\ngrid_search = GridSearch(param_grid=param_grid, experiment=sklearn_exp)\n\ngrid_search.solve()"
   },
   {
    "cell_type": "markdown",
@@ -623,67 +526,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "id": "f9a4d922",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from hyperactive.experiment.integrations import SklearnCvExperiment\n",
-    "from sklearn.datasets import load_iris\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "from sklearn.model_selection import KFold\n",
-    "\n",
-    "X, y = load_iris(return_X_y=True)\n",
-    "\n",
-    "sklearn_exp = SklearnCvExperiment(\n",
-    "    estimator=SVC(),\n",
-    "    scoring=accuracy_score,\n",
-    "    cv=KFold(n_splits=3, shuffle=True),\n",
-    "    X=X,\n",
-    "    y=y,\n",
-    ")"
-   ]
+   "source": "from sklearn.datasets import load_iris\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import KFold\nfrom sklearn.svm import SVC\n\nfrom hyperactive.experiment.integrations import SklearnCvExperiment\n\nX, y = load_iris(return_X_y=True)\n\nsklearn_exp = SklearnCvExperiment(\n    estimator=SVC(),\n    scoring=accuracy_score,\n    cv=KFold(n_splits=3, shuffle=True),\n    X=X,\n    y=y,\n)"
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "id": "9a13b4f3",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                                                           \r"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'C': np.float64(10.0), 'gamma': np.float64(0.1)}"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "from hyperactive.opt import HillClimbing\n",
-    "\n",
-    "hillclimbing_config = {\n",
-    "    \"search_space\": {\n",
-    "    \"C\": np.array([0.01, 0.1, 1, 10]),\n",
-    "    \"gamma\": np.array([0.0001, 0.01, 0.1, 1, 10]),\n",
-    "    },\n",
-    "    \"n_iter\": 100,\n",
-    "}\n",
-    "hill_climbing = HillClimbing(**hillclimbing_config, experiment=sklearn_exp)\n",
-    "\n",
-    "hill_climbing.solve()"
-   ]
+   "outputs": [],
+   "source": "import numpy as np\n\nfrom hyperactive.opt import HillClimbing\n\nhillclimbing_config = {\n    \"search_space\": {\n        \"C\": np.array([0.01, 0.1, 1, 10]),\n        \"gamma\": np.array([0.0001, 0.01, 0.1, 1, 10]),\n    },\n    \"n_iter\": 100,\n}\nhill_climbing = HillClimbing(**hillclimbing_config, experiment=sklearn_exp)\n\nhill_climbing.solve()"
   },
   {
    "cell_type": "markdown",
@@ -716,31 +571,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "4bdf2d49",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# 1. defining the tuned estimator\n",
-    "from sklearn.svm import SVC\n",
-    "from hyperactive.integrations.sklearn import OptCV\n",
-    "from hyperactive.opt import GridSearchSk as GridSearch\n",
-    "\n",
-    "param_grid = {\"kernel\": [\"linear\", \"rbf\"], \"C\": [1, 10]}\n",
-    "tuned_svc = OptCV(SVC(), optimizer=GridSearch(param_grid))\n",
-    "\n",
-    "# 2. fitting the tuned estimator = tuning the hyperparameters\n",
-    "from sklearn.datasets import load_iris\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "X, y = load_iris(return_X_y=True)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
-    "\n",
-    "tuned_svc.fit(X_train, y_train)\n",
-    "\n",
-    "# 3. making predictions with the tuned estimator\n",
-    "y_pred = tuned_svc.predict(X_test)"
-   ]
+   "source": "# 1. defining the tuned estimator\nfrom sklearn.datasets import load_iris\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.svm import SVC\n\nfrom hyperactive.integrations.sklearn import OptCV\nfrom hyperactive.opt import GridSearchSk as GridSearch\n\nparam_grid = {\"kernel\": [\"linear\", \"rbf\"], \"C\": [1, 10]}\ntuned_svc = OptCV(SVC(), optimizer=GridSearch(param_grid))\n\n# 2. fitting the tuned estimator = tuning the hyperparameters\nX, y = load_iris(return_X_y=True)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n\ntuned_svc.fit(X_train, y_train)\n\n# 3. making predictions with the tuned estimator\ny_pred = tuned_svc.predict(X_test)"
   },
   {
    "cell_type": "markdown",
@@ -1198,48 +1033,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "id": "f606284b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                                                              \r"
-     ]
-    }
-   ],
-   "source": [
-    "# 1. defining the tuned estimator\n",
-    "from sklearn.svm import SVC\n",
-    "from hyperactive.integrations.sklearn import OptCV\n",
-    "from hyperactive.opt import HillClimbing\n",
-    "\n",
-    "# picking the optimizer is the only part that changes!\n",
-    "hill_climbing_config = {\n",
-    "    \"search_space\": {\n",
-    "    \"C\": np.array([0.01, 0.1, 1, 10]),\n",
-    "    \"gamma\": np.array([0.0001, 0.01, 0.1, 1, 10]),\n",
-    "    },\n",
-    "    \"n_iter\": 100,\n",
-    "}\n",
-    "hill_climbing = HillClimbing(**hill_climbing_config)\n",
-    "\n",
-    "tuned_svc = OptCV(SVC(), optimizer=hill_climbing)\n",
-    "\n",
-    "# 2. fitting the tuned estimator = tuning the hyperparameters\n",
-    "from sklearn.datasets import load_iris\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "X, y = load_iris(return_X_y=True)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
-    "\n",
-    "tuned_svc.fit(X_train, y_train)\n",
-    "\n",
-    "# 3. making predictions with the tuned estimator\n",
-    "y_pred = tuned_svc.predict(X_test)"
-   ]
+   "outputs": [],
+   "source": "# 1. defining the tuned estimator\nfrom sklearn.datasets import load_iris\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.svm import SVC\n\nfrom hyperactive.integrations.sklearn import OptCV\nfrom hyperactive.opt import HillClimbing\n\n# picking the optimizer is the only part that changes!\nhill_climbing_config = {\n    \"search_space\": {\n        \"C\": np.array([0.01, 0.1, 1, 10]),\n        \"gamma\": np.array([0.0001, 0.01, 0.1, 1, 10]),\n    },\n    \"n_iter\": 100,\n}\nhill_climbing = HillClimbing(**hill_climbing_config)\n\ntuned_svc = OptCV(SVC(), optimizer=hill_climbing)\n\n# 2. fitting the tuned estimator = tuning the hyperparameters\nX, y = load_iris(return_X_y=True)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n\ntuned_svc.fit(X_train, y_train)\n\n# 3. making predictions with the tuned estimator\ny_pred = tuned_svc.predict(X_test)"
   },
   {
    "cell_type": "markdown",
diff --git a/examples/test_examples.py b/examples/test_examples.py
index 12e147df..ad60695c 100644
--- a/examples/test_examples.py
+++ b/examples/test_examples.py
@@ -10,10 +10,8 @@
 don't break the examples.
 """
 
-import os
 import sys
 import subprocess
-import tempfile
 from pathlib import Path
 import pytest
 
diff --git a/pyproject.toml b/pyproject.toml
index 573f718a..8e8a7f78 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ where = ["src"]
 
 [project]
 name = "hyperactive"
-version = "4.8.1"
+version = "5.0.0"
 description = "An optimization and data collection toolbox for convenient and fast prototyping of computationally expensive models."
 readme = "README.md"
 requires-python = ">=3.9"
diff --git a/src/hyperactive/base/_experiment.py b/src/hyperactive/base/_experiment.py
index f95ae560..22023bed 100644
--- a/src/hyperactive/base/_experiment.py
+++ b/src/hyperactive/base/_experiment.py
@@ -1,4 +1,5 @@
 """Base class for experiment."""
+
 # copyright: hyperactive developers, MIT License (see LICENSE file)
 
 import numpy as np
@@ -22,7 +23,7 @@ def __init__(self):
         super().__init__()
 
     def __call__(self, params):
-        """Score parameters. Same as score call, returns only only a first element."""
+        """Score parameters. Same as score call, returns only a first element."""
         score, _ = self.score(params)
         return score
 
@@ -125,6 +126,15 @@ def score(self, params):
             sign = 1
         elif hib == "lower":
             sign = -1
+        elif hib == "mixed":
+            raise NotImplementedError(
+                "Score is undefined for mixed objectives. Override `score` or "
+                "set a concrete objective where higher or lower is better."
+            )
+        else:
+            raise ValueError(
+                f"Unknown value for tag 'property:higher_or_lower_is_better': {hib}"
+            )
 
         eval_res = self.evaluate(params)
         value = eval_res[0]
diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
index 1bd130b1..eafb8dde 100644
--- a/src/hyperactive/experiment/integrations/_skl_metrics.py
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -1,6 +1,38 @@
 """Integration utilities for sklearn metrics with Hyperactive."""
 
-__all__ = ["_coerce_to_scorer", "_guess_sign_of_sklmetric"]
+__all__ = [
+    "_coerce_to_scorer",
+    "_coerce_to_scorer_and_sign",
+    "_guess_sign_of_sklmetric",
+]
+
+
+def _default_metric_for(est):
+    """Get a default metric function for a given estimator type.
+
+    Parameters
+    ----------
+    est : sklearn estimator object or str
+        The estimator to get a default metric for.
+
+    Returns
+    -------
+    metric : callable
+        A default metric function.
+    """
+    from sklearn.base import is_classifier, is_regressor
+    from sklearn.metrics import accuracy_score, r2_score
+
+    if isinstance(est, str):
+        if est == "classifier":
+            return accuracy_score
+        if est == "regressor":
+            return r2_score
+    if is_classifier(est):
+        return accuracy_score
+    if is_regressor(est):
+        return r2_score
+    return accuracy_score  # safe fallback
 
 
 def _coerce_to_scorer(scoring, estimator):
@@ -21,35 +53,61 @@ def _coerce_to_scorer(scoring, estimator):
         A sklearn scorer callable.
         Follows the unified sklearn scorer interface
     """
-    from sklearn.metrics import check_scoring
+    from inspect import signature
 
-    # check if scoring is a scorer by checking for "estimator" in signature
+    from sklearn.metrics import check_scoring, make_scorer
+
+    # Resolve to a sklearn scorer/callable first
     if scoring is None:
+        # use default metric for type strings; otherwise rely on sklearn default
         if isinstance(estimator, str):
-            if estimator == "classifier":
-                from sklearn.metrics import accuracy_score
+            scoring = _default_metric_for(estimator)
+            scorer = make_scorer(scoring)
+        else:
+            scorer = check_scoring(estimator)
+    elif callable(scoring):
+        # user-provided callable
+        if "estimator" in signature(scoring).parameters:
+            scorer = scoring  # passthrough scorer signature
+        else:
+            scorer = make_scorer(scoring)
+    else:
+        # string (scorer name)
+        scorer = check_scoring(estimator, scoring=scoring)
 
-                scoring = accuracy_score
-            elif estimator == "regressor":
-                from sklearn.metrics import r2_score
+    return scorer
 
-                scoring = r2_score
-        else:
-            return check_scoring(estimator)
 
-    # check using inspect.signature for "estimator" in signature
-    if callable(scoring):
-        from inspect import signature
+def _coerce_to_scorer_and_sign(scoring, estimator):
+    """Coerce scoring argument into a sklearn scorer and determine sign.
 
-        if "estimator" in signature(scoring).parameters:
-            return scoring
-        else:
-            from sklearn.metrics import make_scorer
+    Parameters
+    ----------
+    scoring : str, callable, or None
+        The scoring strategy to use.
+    estimator : estimator object or str
+        The estimator to use for default scoring if scoring is None.
 
-            return make_scorer(scoring)
-    else:
-        # scoring is a string (scorer name)
-        return check_scoring(estimator, scoring=scoring)
+        If str, indicates estimator type, should be one of {"classifier", "regressor"}.
+
+    Returns
+    -------
+    scorer : callable
+        A sklearn scorer callable.
+        Follows the unified sklearn scorer interface
+    sign : int
+        1 if higher scores are better, -1 if lower scores are better.
+    """
+    scorer = _coerce_to_scorer(scoring, estimator)
+
+    # Attach a safe metric function for downstream integrations (e.g., sktime)
+    score_func = getattr(scorer, "_score_func", None)
+    if score_func is None:
+        score_func = _default_metric_for(estimator)
+
+    sign = _guess_sign_of_sklmetric(score_func)
+
+    return scorer, sign
 
 
 def _guess_sign_of_sklmetric(scorer):
@@ -113,16 +171,18 @@ def _guess_sign_of_sklmetric(scorer):
 
     if hasattr(scorer, "greater_is_better"):
         return 1 if scorer.greater_is_better else -1
-    elif scorer_name in HIGHER_IS_BETTER:
+    if scorer_name is None:
+        # no name available; conservatively assume lower is better
+        return -1
+    if scorer_name in HIGHER_IS_BETTER:
         return 1 if HIGHER_IS_BETTER[scorer_name] else -1
-    elif scorer_name.endswith("_score"):
+    if scorer_name.endswith("_score"):
         # If the scorer name ends with "_score", we assume higher is better
         return 1
-    elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
-        # If the scorer name ends with "_loss", we assume lower is better
+    if scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
+        # If the scorer name ends with "_loss"/"_deviance", assume lower is better
         return -1
-    elif scorer_name.endswith("_error"):
-        return -1
-    else:
-        # If we cannot determine the sign, we assume lower is better
+    if scorer_name.endswith("_error"):
         return -1
+    # If we cannot determine the sign, assume lower is better
+    return -1
diff --git a/src/hyperactive/experiment/integrations/sklearn_cv.py b/src/hyperactive/experiment/integrations/sklearn_cv.py
index 051edfde..65b6e7a1 100644
--- a/src/hyperactive/experiment/integrations/sklearn_cv.py
+++ b/src/hyperactive/experiment/integrations/sklearn_cv.py
@@ -7,10 +7,7 @@
 from sklearn.utils.validation import _num_samples
 
 from hyperactive.base import BaseExperiment
-from hyperactive.experiment.integrations._skl_metrics import (
-    _coerce_to_scorer,
-    _guess_sign_of_sklmetric,
-)
+from hyperactive.experiment.integrations._skl_metrics import _coerce_to_scorer_and_sign
 
 
 class SklearnCvExperiment(BaseExperiment):
@@ -100,15 +97,11 @@ def __init__(self, estimator, X, y, scoring=None, cv=None):
         else:
             self._cv = cv
 
-        self._scoring = _coerce_to_scorer(scoring, self.estimator)
+        self._scoring, _sign = _coerce_to_scorer_and_sign(scoring, self.estimator)
         self.scorer_ = self._scoring
 
-        # Set the sign of the scoring function
-        if hasattr(self._scoring, "_score"):
-            score_func = self._scoring._score_func
-            _sign = _guess_sign_of_sklmetric(score_func)
-            _sign_str = "higher" if _sign == 1 else "lower"
-            self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
+        _sign_str = "higher" if _sign == 1 else "lower"
+        self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
 
     def _paramnames(self):
         """Return the parameter names of the search.
diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index ab4622b8..d2ca27dc 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -1,13 +1,11 @@
 """Experiment adapter for sktime backtesting experiments."""
+
 # copyright: hyperactive developers, MIT License (see LICENSE file)
 
 import numpy as np
 
 from hyperactive.base import BaseExperiment
-from hyperactive.experiment.integrations._skl_metrics import (
-    _coerce_to_scorer,
-    _guess_sign_of_sklmetric,
-)
+from hyperactive.experiment.integrations._skl_metrics import _coerce_to_scorer_and_sign
 
 
 class SktimeClassificationExperiment(BaseExperiment):
@@ -173,14 +171,11 @@ def __init__(
 
         super().__init__()
 
-        self._scoring = _coerce_to_scorer(scoring, "classifier")
+        self._scoring, _sign = _coerce_to_scorer_and_sign(scoring, "classifier")
 
-        # Set the sign of the scoring function
-        if hasattr(self._scoring, "_score"):
-            score_func = self._scoring._score_func
-            _sign = _guess_sign_of_sklmetric(score_func)
-            _sign_str = "higher" if _sign == 1 else "lower"
-            self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
+        # Set the sign of the scoring function (rely on sklearn scorer if present)
+        _sign_str = "higher" if _sign == 1 else "lower"
+        self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
 
         # default handling for cv
         if isinstance(cv, int):
@@ -223,19 +218,25 @@ def _evaluate(self, params):
 
         estimator = self.estimator.clone().set_params(**params)
 
+        # determine metric function for sktime.evaluate via centralized coerce helper
+        metric_func = getattr(self._scoring, "_metric_func", None)
+        if metric_func is None:
+            # very defensive fallback (should not happen due to _coerce_to_scorer)
+            from sklearn.metrics import accuracy_score as metric_func  # type: ignore
+
         results = evaluate(
             estimator,
             cv=self._cv,
             X=self.X,
             y=self.y,
-            scoring=self._scoring._score_func,
+            scoring=metric_func,
             error_score=self.error_score,
             backend=self.backend,
             backend_params=self.backend_params,
         )
 
-        metric = self._scoring._score_func
-        result_name = f"test_{metric.__name__}"
+        metric = metric_func
+        result_name = f"test_{getattr(metric, '__name__', 'score')}"
 
         res_float = results[result_name].mean()
 
@@ -294,7 +295,18 @@ def get_test_params(cls, parameter_set="default"):
             "scoring": brier_score_loss,
         }
 
-        return [params0, params1]
+        def passthrough_scorer(estimator, X, y):
+            return estimator.score(X, y)
+
+        params2 = {
+            "estimator": DummyClassifier(strategy="prior"),
+            "X": X,
+            "y": y,
+            "cv": KFold(n_splits=2),
+            "scoring": passthrough_scorer,
+        }
+
+        return [params0, params1, params2]
 
     @classmethod
     def _get_score_params(self):
diff --git a/src/hyperactive/integrations/sklearn/checks.py b/src/hyperactive/integrations/sklearn/checks.py
index c752ee71..93f3bf1c 100644
--- a/src/hyperactive/integrations/sklearn/checks.py
+++ b/src/hyperactive/integrations/sklearn/checks.py
@@ -1,5 +1,7 @@
 """Validation checks for scikit-learn integration."""
 
+from functools import wraps
+
 
 class Checks:
     """Checks class."""
@@ -7,11 +9,11 @@ class Checks:
     _fit_successful = False
 
     def verify_fit(function):
-        """Verify Fit function."""
+        """Mark fit successful and preserve signature."""
 
-        def wrapper(self, X, y):
-            """Wrap function call."""
-            out = function(self, X, y)
+        @wraps(function)
+        def wrapper(self, *args, **kwargs):
+            out = function(self, *args, **kwargs)
             self._fit_successful = True
             return out
 
diff --git a/src/hyperactive/opt/_common.py b/src/hyperactive/opt/_common.py
index 37c01e3a..9e7c1215 100644
--- a/src/hyperactive/opt/_common.py
+++ b/src/hyperactive/opt/_common.py
@@ -4,13 +4,17 @@
 
 
 def _score_params(params, meta):
-    """Score parameters, used in parallelization."""
+    """Score parameters, used in parallelization.
+
+    Uses experiment.score (via __call__), which is standardized to
+    "higher-is-better" across experiments.
+    """
     meta = meta.copy()
     experiment = meta["experiment"]
     error_score = meta["error_score"]
 
     try:
-        return experiment(**params)
+        return float(experiment(**params))
     except Exception:  # noqa: B904
         # Catch all exceptions and assign error_score
         return error_score
diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py
index 8c0cd51c..835bb4ae 100644
--- a/src/hyperactive/opt/gridsearch/_sk.py
+++ b/src/hyperactive/opt/gridsearch/_sk.py
@@ -153,6 +153,7 @@ def _solve(self, experiment, param_grid, error_score, backend, backend_params):
             "error_score": error_score,
         }
 
+        # scores are sign-adjusted via experiment.score (higher-is-better)
         scores = parallelize(
             fun=_score_params,
             iter=candidate_params,
@@ -161,11 +162,15 @@ def _solve(self, experiment, param_grid, error_score, backend, backend_params):
             backend_params=backend_params,
         )
 
-        best_index = np.argmin(scores)
+        # select best by maximizing standardized score
+        best_index = int(np.argmax(scores))
+
         best_params = candidate_params[best_index]
 
+        # store public attributes
         self.best_index_ = best_index
-        self.best_score_ = scores[best_index]
+        self.best_score_ = float(scores[best_index])
+        self.best_params_ = best_params
 
         return best_params
 
diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py
index c6c23e10..2ae97bbc 100644
--- a/src/hyperactive/opt/random_search.py
+++ b/src/hyperactive/opt/random_search.py
@@ -190,10 +190,12 @@ def _solve(
             backend_params=backend_params,
         )
 
-        best_index = int(np.argmin(scores))  # lower-is-better convention
+        # select best by maximizing standardized score
+        best_index = int(np.argmax(scores))
+
         best_params = candidate_params[best_index]
 
-        # public attributes for external consumers
+        # public attributes for external consumers (signed score convention)
         self.best_index_ = best_index
         self.best_score_ = float(scores[best_index])
         self.best_params_ = best_params
diff --git a/src/hyperactive/tests/test_all_objects.py b/src/hyperactive/tests/test_all_objects.py
index ec399c64..559af35d 100644
--- a/src/hyperactive/tests/test_all_objects.py
+++ b/src/hyperactive/tests/test_all_objects.py
@@ -348,3 +348,117 @@ def test_gfo_integration(self, object_instance):
         assert isinstance(best_params, dict), "Best parameters should be a dictionary"
         assert "C" in best_params, "Best parameters should contain 'C'"
         assert "gamma" in best_params, "Best parameters should contain 'gamma'"
+
+    def test_selection_direction_backend(self, object_instance):
+        """Backends return argmax over standardized scores on controlled setup.
+
+        This verifies the maximization direction using a tiny, deterministic
+        experiment and a deliberately poor warm start. It is scoped per-backend
+        to avoid brittle stochastic behavior.
+        """
+        # Import backend bases to check optimizer type
+        from hyperactive.opt._adapters._base_optuna_adapter import _BaseOptunaAdapter
+        from hyperactive.opt._adapters._gfo import _BaseGFOadapter
+        from hyperactive.opt.gridsearch._sk import GridSearchSk
+        from hyperactive.opt.random_search import RandomSearchSk
+
+        # small helper to attach the right space key without per-estimator branching
+        def _cfg_with_space(est, exp, space):
+            cfg = {"experiment": exp}
+            param_keys = set(est.get_params().keys())
+            for key in (
+                "param_space",
+                "search_space",
+                "param_grid",
+                "param_distributions",
+            ):
+                if key in param_keys:
+                    cfg[key] = space
+                    break
+            return cfg
+
+        # set up a simple deterministic experiment with clear best vs worst
+        from hyperactive.experiment.bench import Ackley
+
+        exp = Ackley(d=2)
+        # ackley is lower-better on evaluate; score flips sign to higher-better
+        poor = {"x0": 4.0, "x1": 4.0}
+        good = {"x0": 0.0, "x1": 0.0}
+
+        # Helper: assert that returned params equal the known good point
+        def _assert_good(best_params):
+            assert isinstance(best_params, dict)
+            assert best_params == good, (
+                f"Optimizer should select argmax of standardized score. "
+                f"Expected {good}, got {best_params}."
+            )
+
+        space = {"x0": [0.0, 4.0], "x1": [0.0, 4.0]}
+        base_cfg = _cfg_with_space(object_instance, exp, space)
+
+        # Optuna adapters: use warm_start via initialize and categorical space
+        if isinstance(object_instance, _BaseOptunaAdapter):
+            inst = object_instance.clone().set_params(
+                **{
+                    **base_cfg,
+                    "n_trials": 2,
+                    "initialize": {"warm_start": [poor, good]},
+                    "random_state": 0,
+                }
+            )
+            best_params = inst.solve()
+            _assert_good(best_params)
+            return None
+
+        # GFO adapters: pass discrete space and warm_start; keep iterations tiny
+        if isinstance(object_instance, _BaseGFOadapter):
+            inst = object_instance.clone().set_params(
+                **{
+                    **base_cfg,
+                    "n_iter": 2,
+                    "initialize": {
+                        "warm_start": [poor, good],
+                        "grid": 0,
+                        "random": 0,
+                        "vertices": 0,
+                    },
+                    # keep Bayesian-style pre-sampling tiny to avoid heavy defaults
+                    "random_state": 0,
+                    "verbose": False,
+                }
+            )
+            best_params = inst.solve()
+            # In case the backend evaluates beyond warm starts, fall back to score check
+            if best_params != good:
+                sc_good, _ = exp.score(good)
+                sc_poor, _ = exp.score(poor)
+                sc_best, _ = exp.score(best_params)
+                assert sc_best >= max(sc_good, sc_poor)
+            else:
+                _assert_good(best_params)
+            return None
+
+        # Sklearn GridSearch optimizer: test with discrete parameter grid
+        if isinstance(object_instance, GridSearchSk):
+            inst = object_instance.clone().set_params(**base_cfg)
+            best_params = inst.solve()
+            # GridSearchSk evaluates all grid combinations and selects best
+            _assert_good(best_params)
+            return None
+
+        # Sklearn RandomSearch optimizer: test with discrete parameter distributions
+        if isinstance(object_instance, RandomSearchSk):
+            inst = object_instance.clone().set_params(
+                **{
+                    **base_cfg,
+                    "n_iter": 4,  # Evaluate all combinations in small space
+                    "random_state": 0,  # Ensure deterministic sampling
+                }
+            )
+            best_params = inst.solve()
+            # RandomSearchSk samples and selects best from evaluated points
+            _assert_good(best_params)
+            return None
+
+        # For other backends, no-op here; targeted direction tests live elsewhere
+        return None
diff --git a/src/hyperactive/utils/estimator_checks.py b/src/hyperactive/utils/estimator_checks.py
index 1bc9f793..009869d5 100644
--- a/src/hyperactive/utils/estimator_checks.py
+++ b/src/hyperactive/utils/estimator_checks.py
@@ -96,14 +96,12 @@ def check_estimator(
     {'test_clone[HillClimbing-1]': 'PASSED'}
     """
     msg = (
-        "check_estimator is a testing utility for developers, and "
-        "requires pytest to be present "
-        "in the python environment, but pytest was not found. "
-        "pytest is a developer dependency and not included in the base "
-        "sktime installation. Please run: `pip install pytest` to "
-        "install the pytest package. "
-        "To install sktime with all developer dependencies, run:"
-        " `pip install hyperactive[dev]`"
+        "check_estimator is a testing utility for developers and requires "
+        "pytest to be present in the Python environment, but pytest was not found. "
+        "pytest is a developer dependency and not included in the base Hyperactive "
+        "installation. Please run: `pip install pytest` to install the pytest package. "
+        "To install Hyperactive with all developer dependencies, run: "
+        "`pip install hyperactive[dev]`."
     )
     _check_soft_dependencies("pytest", msg=msg)