From cc3b4c52d602e5293de4b5987cb16a3a09b4ceab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 19:55:16 +0200 Subject: [PATCH 01/20] exp --- .../experiment/integrations/_skl_cv.py | 31 ++ .../experiment/integrations/sklearn_cv.py | 14 +- .../experiment/integrations/skpro_probareg.py | 308 ++++++++++++++++++ 3 files changed, 342 insertions(+), 11 deletions(-) create mode 100644 src/hyperactive/experiment/integrations/_skl_cv.py create mode 100644 src/hyperactive/experiment/integrations/skpro_probareg.py diff --git a/src/hyperactive/experiment/integrations/_skl_cv.py b/src/hyperactive/experiment/integrations/_skl_cv.py new file mode 100644 index 00000000..35f8da7d --- /dev/null +++ b/src/hyperactive/experiment/integrations/_skl_cv.py @@ -0,0 +1,31 @@ +"""Integration utilities for sklearn splitters with Hyperactive.""" + +__all__ = ["_coerce_cv"] + + +def _coerce_cv(cv): + """Coerce cv argument into a sklearn-compatible cv splitter. + + Parameters + ---------- + cv : int, cross-validation generator, or iterable + The cross-validation strategy to use. + + Returns + ------- + cv_splitter : cross-validation generator or iterable + A sklearn-compatible cross-validation splitter. + """ + from sklearn.model_selection import KFold + + # default handling for cv + if isinstance(cv, int): + from sklearn.model_selection import KFold + + return KFold(n_splits=cv, shuffle=True) + elif cv is None: + from sklearn.model_selection import KFold + + return KFold(n_splits=3, shuffle=True) + else: + return cv diff --git a/src/hyperactive/experiment/integrations/sklearn_cv.py b/src/hyperactive/experiment/integrations/sklearn_cv.py index 65b6e7a1..2ecc6c6d 100644 --- a/src/hyperactive/experiment/integrations/sklearn_cv.py +++ b/src/hyperactive/experiment/integrations/sklearn_cv.py @@ -7,6 +7,7 @@ from sklearn.utils.validation import _num_samples from hyperactive.base import BaseExperiment +from hyperactive.experiment.integrations._skl_cv import _coerce_cv from hyperactive.experiment.integrations._skl_metrics import _coerce_to_scorer_and_sign @@ -31,7 +32,7 @@ class SklearnCvExperiment(BaseExperiment): estimator : sklearn estimator The estimator to be used for the experiment. X : array-like, shape (n_samples, n_features) - The input data for the model. + The input data for the model. y : array-like, shape (n_samples,) or (n_samples, n_outputs) The target values for the model. cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True) @@ -86,16 +87,7 @@ def __init__(self, estimator, X, y, scoring=None, cv=None): super().__init__() - if cv is None: - from sklearn.model_selection import KFold - - self._cv = KFold(n_splits=3, shuffle=True) - elif isinstance(cv, int): - from sklearn.model_selection import KFold - - self._cv = KFold(n_splits=cv, shuffle=True) - else: - self._cv = cv + self._cv = _coerce_cv(cv) self._scoring, _sign = _coerce_to_scorer_and_sign(scoring, self.estimator) self.scorer_ = self._scoring diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py new file mode 100644 index 00000000..f8e0dc2e --- /dev/null +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -0,0 +1,308 @@ +"""Experiment adapter for sktime backtesting experiments.""" + +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import numpy as np + +from hyperactive.base import BaseExperiment +from hyperactive.experiment.integrations._skl_cv import _coerce_cv + + +class SkproProbaRegExperiment(BaseExperiment): + """Experiment adapter for skpro probabilistic regression or survival prediction. + + This class is used to perform cross-validation experiments using a given + skpro probabilistic regressor or survival predictor. + It allows for hyperparameter tuning and evaluation of + the model's performance. + + The score returned is the summary backtesting score, + of applying ``sktime`` ``evaluate`` to ``estimator`` with the parameters given in + ``score`` ``params``. + + The backtesting performed is specified by the ``cv`` parameter, + and the scoring metric is specified by the ``scoring`` parameter. + The ``X`` and ``y`` parameters are the input data and target values, + which are used in fit/predict cross-validation. + + Parameters + ---------- + estimator : skpro BaseProbaRegressor descendant instance + skpro probabilistic regressor or survival predictor to be evaluated. + + X : pd.DataFrame, shape (n_samples, n_features) + Feature instances to use in evaluation experiment + + y : pd.DataFrame, shape (n_samples,) or (n_samples, n_outputs) + Labels to use in the evaluation experiment + + C : pd.DataFrame, optional (default=None) + censoring information to use in the evaluation experiment, + + * should have same column name as y, same length as X and y + * should have entries 0 and 1 (float or int), + 0 = uncensored, 1 = (right) censored + + if None, all observations are assumed to be uncensored. + Can be passed to any probabilistic regressor, + but is ignored if ``capability:survival`` tag is ``False``. + + cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None = default = ``KFold(n_splits=3, shuffle=True)`` + - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True`` + - An iterable yielding (train, test) splits as arrays of indices. + + scoring : str, callable, default=CRPS + Strategy to evaluate the performance of the cross-validated model on test set. + An ``skpro`` probabilistic regression or survival prediction metric. + + error_score : "raise" or numeric, default=np.nan + Value to assign to the score if an exception occurs in estimator fitting. If set + to "raise", the exception is raised. If a numeric value is given, + FitFailedWarning is raised. + + backend : string, by default "None". + Parallelization backend to use for runs. + Runs parallel evaluate if specified and ``strategy="refit"``. + + - "None": executes loop sequentially, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "dask_lazy": same as "dask", + but changes the return to (lazy) ``dask.dataframe.DataFrame``. + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, + e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + + Example + ------- + >>> from hyperactive.experiment.integrations import SkproProbaRegExperiment + >>> from sklearn.datasets import load_diabetes + >>> from sklearn.model_selection import KFold + >>> from skpro.metrics import CRPS + >>> from skpro.regression.dummy import DummyProbaRegressor + >>> + >>> X, y = load_diabetes(return_X_y=True, as_frame=True) + >>> y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame + >>> + >>> sktime_exp = SktimeClassificationExperiment( + ... estimator=DummyProbaRegressor(), + ... scoring=CRPS(), + ... cv=KFold(n_splits=2), + ... X=X, + ... y=y, + ... ) + >>> params = {"strategy": "normal"} + >>> score, add_info = sktime_exp.score(params) + + For default choices of ``scoring`` and ``cv``: + >>> sktime_exp = SktimeClassificationExperiment( + ... estimator=DummyProbaRegressor(), + ... X=X, + ... y=y, + ... ) + >>> params = {"strategy": "most_frequent"} + >>> score, add_info = sktime_exp.score(params) + + Quick call without metadata return or dictionary: + >>> score = sktime_exp({"strategy": "normal"}) + """ + + _tags = { + "authors": "fkiraly", + "maintainers": "fkiraly", + "python_dependencies": "skpro", # python dependencies + } + + def __init__( + self, + estimator, + X, + y, + cv=None, + scoring=None, + error_score=np.nan, + backend=None, + backend_params=None, + ): + self.estimator = estimator + self.X = X + self.y = y + self.scoring = scoring + self.cv = cv + self.error_score = error_score + self.backend = backend + self.backend_params = backend_params + + super().__init__() + + # Set the sign of the scoring function (rely on sklearn scorer if present) + higher_is_better = not scoring.get_tag("lower_is_better") + self.set_tags(**{"property:higher_or_lower_is_better": higher_is_better}) + + self._cv = _coerce_cv(cv) + + if scoring is None: + from skpro.metrics import CRPS + + self._scoring = CRPS() + else: + self._scoring = scoring + + def _paramnames(self): + """Return the parameter names of the search. + + Returns + ------- + list of str + The parameter names of the search parameters. + """ + return list(self.estimator.get_params().keys()) + + def _evaluate(self, params): + """Evaluate the parameters. + + Parameters + ---------- + params : dict with string keys + Parameters to evaluate. + + Returns + ------- + float + The value of the parameters as per evaluation. + dict + Additional metadata about the search. + """ + from skpro.model_selection import evaluate + + estimator = self.estimator.clone().set_params(**params) + + results = evaluate( + estimator, + cv=self._cv, + X=self.X, + y=self.y, + scoring=self.scoring, + error_score=self.error_score, + backend=self.backend, + backend_params=self.backend_params, + ) + + result_name = f"test_{self.scoring.name}" + + res_float = results[result_name].mean() + + return res_float, {"results": results} + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the skbase object. + + ``get_test_params`` is a unified interface point to store + parameter settings for testing purposes. This function is also + used in ``create_test_instance`` and ``create_test_instances_and_names`` + to construct test instances. + + ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``. + + Each ``dict`` is a parameter configuration for testing, + and can be used to construct an "interesting" test instance. + A call to ``cls(**params)`` should + be valid for all dictionaries ``params`` in the return of ``get_test_params``. + + The ``get_test_params`` need not return fixed lists of dictionaries, + it can also return dynamic or stochastic parameter settings. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + import pandas as pd + + from sklearn.datasets import load_diabetes + from sklearn.model_selection import KFold + from skpro.metrics import ConcordanceHarrell + from skpro.regression.residual import ResidualDouble + from skpro.survival.compose import ConditionUncensored + + X, y = load_diabetes(return_X_y=True, as_frame=True) + y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame + params0 = { + "estimator": ResidualDouble.create_test_instance(), + "X": X, + "y": y, + } + + params1 = { + "estimator": ConditionUncensored.create_test_instance(), + "cv": KFold(n_splits=2), + "X": X, + "y": y, + "scoring": ConcordanceHarrell(), + } + + return [params0, params1] + + @classmethod + def _get_score_params(self): + """Return settings for testing score/evaluate functions. Used in tests only. + + Returns a list, the i-th element should be valid arguments for + self.evaluate and self.score, of an instance constructed with + self.get_test_params()[i]. + + Returns + ------- + list of dict + The parameters to be used for scoring. + """ + from skpro.regression.dummy import DummyProbaRegressor + + val0 = {} + val1 = {"estimator": DummyProbaRegressor()} + return [val0, val1] From f19d09f0f48e1e63c2dcb64dbfe61e66a23c05a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 20:07:02 +0200 Subject: [PATCH 02/20] export --- src/hyperactive/experiment/integrations/__init__.py | 4 ++++ src/hyperactive/experiment/integrations/skpro_probareg.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py index 4ed0ce52..06c4c584 100644 --- a/src/hyperactive/experiment/integrations/__init__.py +++ b/src/hyperactive/experiment/integrations/__init__.py @@ -2,6 +2,9 @@ # copyright: hyperactive developers, MIT License (see LICENSE file) from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment +from hyperactive.experiment.integrations.skpro_probareg import ( + SkproProbaRegExperiment, +) from hyperactive.experiment.integrations.sktime_classification import ( SktimeClassificationExperiment, ) @@ -11,6 +14,7 @@ __all__ = [ "SklearnCvExperiment", + "SkproProbaRegExperiment", "SktimeClassificationExperiment", "SktimeForecastingExperiment", ] diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index f8e0dc2e..f94263b7 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -263,7 +263,6 @@ def get_test_params(cls, parameter_set="default"): `create_test_instance` uses the first (or only) dictionary in `params` """ import pandas as pd - from sklearn.datasets import load_diabetes from sklearn.model_selection import KFold from skpro.metrics import ConcordanceHarrell From 9515cf7984641d8bc25edcde59304fbb3260b5bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 22:23:48 +0200 Subject: [PATCH 03/20] Update pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 30493a6f..af3dd76a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ sklearn-integration = [ "scikit-learn <1.8.0", ] sktime-integration = [ + "skpro", "sktime", ] build = [ From ee37ff281e487b17fefe5355876a42897b76fcb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 22:33:03 +0200 Subject: [PATCH 04/20] Update skpro_probareg.py --- src/hyperactive/experiment/integrations/skpro_probareg.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index f94263b7..c41c8df1 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -170,10 +170,6 @@ def __init__( super().__init__() - # Set the sign of the scoring function (rely on sklearn scorer if present) - higher_is_better = not scoring.get_tag("lower_is_better") - self.set_tags(**{"property:higher_or_lower_is_better": higher_is_better}) - self._cv = _coerce_cv(cv) if scoring is None: @@ -183,6 +179,10 @@ def __init__( else: self._scoring = scoring + # Set the sign of the scoring function (rely on sklearn scorer if present) + higher_is_better = not self._scoring.get_tag("lower_is_better") + self.set_tags(**{"property:higher_or_lower_is_better": higher_is_better}) + def _paramnames(self): """Return the parameter names of the search. From b3dbf091b77f20674a0caf4d0c5ce963da81a3d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 22:43:10 +0200 Subject: [PATCH 05/20] Update skpro_probareg.py --- .../experiment/integrations/skpro_probareg.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index c41c8df1..4a539c84 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -119,7 +119,7 @@ class SkproProbaRegExperiment(BaseExperiment): >>> X, y = load_diabetes(return_X_y=True, as_frame=True) >>> y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame >>> - >>> sktime_exp = SktimeClassificationExperiment( + >>> skpro_exp = SkproProbaRegExperiment( ... estimator=DummyProbaRegressor(), ... scoring=CRPS(), ... cv=KFold(n_splits=2), @@ -127,19 +127,19 @@ class SkproProbaRegExperiment(BaseExperiment): ... y=y, ... ) >>> params = {"strategy": "normal"} - >>> score, add_info = sktime_exp.score(params) + >>> score, add_info = skpro_exp.score(params) For default choices of ``scoring`` and ``cv``: - >>> sktime_exp = SktimeClassificationExperiment( + >>> skpro_exp = SkproProbaRegExperiment( ... estimator=DummyProbaRegressor(), ... X=X, ... y=y, ... ) >>> params = {"strategy": "most_frequent"} - >>> score, add_info = sktime_exp.score(params) + >>> score, add_info = skpro_exp.score(params) Quick call without metadata return or dictionary: - >>> score = sktime_exp({"strategy": "normal"}) + >>> score = skpro_exp({"strategy": "normal"}) """ _tags = { From 667591ee2ff8faa09603d307b793157315046b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 22:45:43 +0200 Subject: [PATCH 06/20] Update skpro_probareg.py --- src/hyperactive/experiment/integrations/skpro_probareg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index 4a539c84..f4daa2e5 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -1,4 +1,4 @@ -"""Experiment adapter for sktime backtesting experiments.""" +"""Experiment adapter for skpro corss-validation experiments.""" # copyright: hyperactive developers, MIT License (see LICENSE file) @@ -17,7 +17,7 @@ class SkproProbaRegExperiment(BaseExperiment): the model's performance. The score returned is the summary backtesting score, - of applying ``sktime`` ``evaluate`` to ``estimator`` with the parameters given in + of applying ``skpro`` ``evaluate`` to ``estimator`` with the parameters given in ``score`` ``params``. The backtesting performed is specified by the ``cv`` parameter, From a353125a814f251ef3e1ce5af8d99ec171fc3e95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 22:47:10 +0200 Subject: [PATCH 07/20] Update skpro_probareg.py --- src/hyperactive/experiment/integrations/skpro_probareg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index f4daa2e5..e2663d5c 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -110,6 +110,7 @@ class SkproProbaRegExperiment(BaseExperiment): Example ------- + >>> import pandas as pd >>> from hyperactive.experiment.integrations import SkproProbaRegExperiment >>> from sklearn.datasets import load_diabetes >>> from sklearn.model_selection import KFold @@ -180,7 +181,10 @@ def __init__( self._scoring = scoring # Set the sign of the scoring function (rely on sklearn scorer if present) - higher_is_better = not self._scoring.get_tag("lower_is_better") + if self._scoring.get_tag("lower_is_better"): + higher_is_better = "lower" + else: + higher_is_better = "higher" self.set_tags(**{"property:higher_or_lower_is_better": higher_is_better}) def _paramnames(self): From 3851ebfa399df78725507343cced939bd093f9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 22:58:41 +0200 Subject: [PATCH 08/20] Update skpro_probareg.py --- src/hyperactive/experiment/integrations/skpro_probareg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index e2663d5c..650e05fc 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -212,7 +212,7 @@ def _evaluate(self, params): dict Additional metadata about the search. """ - from skpro.model_selection import evaluate + from skpro.benchmarking.evaluate import evaluate estimator = self.estimator.clone().set_params(**params) From 689e933135562a0d24b4c77d7ce3dc6e826f0924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 26 Sep 2025 23:30:15 +0200 Subject: [PATCH 09/20] Update skpro_probareg.py --- src/hyperactive/experiment/integrations/skpro_probareg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index 650e05fc..0a2df034 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -221,13 +221,13 @@ def _evaluate(self, params): cv=self._cv, X=self.X, y=self.y, - scoring=self.scoring, + scoring=self._scoring, error_score=self.error_score, backend=self.backend, backend_params=self.backend_params, ) - result_name = f"test_{self.scoring.name}" + result_name = f"test_{self._scoring.name}" res_float = results[result_name].mean() From 5a4a87e11916255298a95bc0c9378f1531a9e344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 00:41:04 +0200 Subject: [PATCH 10/20] C --- src/hyperactive/experiment/integrations/skpro_probareg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index 0a2df034..6a7cb830 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -154,6 +154,7 @@ def __init__( estimator, X, y, + C=None, cv=None, scoring=None, error_score=np.nan, @@ -163,6 +164,7 @@ def __init__( self.estimator = estimator self.X = X self.y = y + self.C = C self.scoring = scoring self.cv = cv self.error_score = error_score @@ -221,6 +223,7 @@ def _evaluate(self, params): cv=self._cv, X=self.X, y=self.y, + C=self.C, scoring=self._scoring, error_score=self.error_score, backend=self.backend, From a2b456d64b36965e3a6b9293f57b1ef34b06d92f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 00:41:21 +0200 Subject: [PATCH 11/20] tuner --- .../integrations/skpro/__init__.py | 5 + .../integrations/skpro/_probareg.py | 333 ++++++++++++++++++ .../integrations/skpro/tests/__init__.py | 1 + .../skpro/tests/test_skpro_estimators.py | 22 ++ 4 files changed, 361 insertions(+) create mode 100644 src/hyperactive/integrations/skpro/__init__.py create mode 100644 src/hyperactive/integrations/skpro/_probareg.py create mode 100644 src/hyperactive/integrations/skpro/tests/__init__.py create mode 100644 src/hyperactive/integrations/skpro/tests/test_skpro_estimators.py diff --git a/src/hyperactive/integrations/skpro/__init__.py b/src/hyperactive/integrations/skpro/__init__.py new file mode 100644 index 00000000..121fd14b --- /dev/null +++ b/src/hyperactive/integrations/skpro/__init__.py @@ -0,0 +1,5 @@ +"""Integrations for skpro with Hyperactive.""" + +from hyperactive.integrations.skpro._probareg import ProbaRegOptCV + +__all__ = ["ProbaRegOptCV"] diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py new file mode 100644 index 00000000..b6b395fa --- /dev/null +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -0,0 +1,333 @@ +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import numpy as np +from skbase.utils.dependencies import _check_soft_dependencies + +if _check_soft_dependencies("skpro", severity="none"): + from skpro.regression.base import _DelegatedProbaRegressor +else: + from skbase.base import BaseEstimator as _DelegatedProbaRegressor + +from hyperactive.experiment.integrations.skpro_probareg import SkproProbaRegExperiment + + +class ProbaRegOptCV(_DelegatedProbaRegressor): + """Tune an sktime classifier via any optimizer in the hyperactive toolbox. + + ``TSCOptCV`` uses any available tuning engine from ``hyperactive`` + to tune a classifier by backtesting. + + It passes backtesting results as scores to the tuning engine, + which identifies the best hyperparameters. + + Any available tuning engine from hyperactive can be used, for example: + + * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``, + this results in the same algorithm as ``TSCGridSearchCV`` + * hill climbing - ``from hyperactive.opt import HillClimbing`` + * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer`` + + Configuration of the tuning engine is as per the respective documentation. + + Formally, ``TSCOptCV`` does the following: + + In ``fit``: + + * wraps the ``estimator``, ``scoring``, and other parameters + into a ``SktimeClassificationExperiment`` instance, which is passed to the + optimizer ``optimizer`` as the ``experiment`` argument. + * Optimal parameters are then obtained from ``optimizer.solve``, and set + as ``best_params_`` and ``best_estimator_`` attributes. + * If ``refit=True``, ``best_estimator_`` is fitted to the entire ``y`` and ``X``. + + In ``predict`` and ``predict``-like methods, calls the respective method + of the ``best_estimator_`` if ``refit=True``. + + Parameters + ---------- + estimator : sktime classifier, BaseClassifier instance or interface compatible + The classifier to tune, must implement the sktime classifier interface. + + optimizer : hyperactive BaseOptimizer + The optimizer to be used for hyperparameter search. + + cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None = default = ``KFold(n_splits=3, shuffle=True)`` + - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True`` + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. These splitters are instantiated + with ``shuffle=False`` so the splits will be the same across calls. + + scoring : str, callable, default=None + Strategy to evaluate the performance of the cross-validated model on + the test set. Can be: + + - a single string resolvable to an sklearn scorer + - a callable that returns a single value; + - ``None`` = default = ``accuracy_score`` + + refit : bool, optional (default=True) + True = refit the forecaster with the best parameters on the entire data in fit + False = no refitting takes place. The forecaster cannot be used to predict. + This is to be used to tune the hyperparameters, and then use the estimator + as a parameter estimator, e.g., via get_fitted_params or PluginParamsForecaster. + + error_score : "raise" or numeric, default=np.nan + Value to assign to the score if an exception occurs in estimator fitting. If set + to "raise", the exception is raised. If a numeric value is given, + FitFailedWarning is raised. + + backend : string, by default "None". + Parallelization backend to use for runs. + Runs parallel evaluate if specified and ``strategy="refit"``. + + - "None": executes loop sequentially, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "dask_lazy": same as "dask", + but changes the return to (lazy) ``dask.dataframe.DataFrame``. + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, + e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + + Example + ------- + Any available tuning engine from hyperactive can be used, for example: + + * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch`` + * hill climbing - ``from hyperactive.opt import HillClimbing`` + * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer`` + + For illustration, we use grid search, this can be replaced by any other optimizer. + + 1. defining the tuned estimator: + >>> from sktime.classification.dummy import DummyClassifier + >>> from sklearn.model_selection import KFold + >>> from hyperactive.integrations.sktime import TSCOptCV + >>> from hyperactive.opt import GridSearchSk as GridSearch + >>> + >>> param_grid = {"strategy": ["most_frequent", "stratified"]} + >>> tuned_naive = TSCOptCV( + ... DummyClassifier(), + ... GridSearch(param_grid), + ... cv=KFold(n_splits=2, shuffle=False), + ... ) + + 2. fitting the tuned estimator: + >>> from sktime.datasets import load_unit_test + >>> X_train, y_train = load_unit_test( + ... return_X_y=True, split="TRAIN", return_type="pd-multiindex" + ... ) + >>> X_test, _ = load_unit_test( + ... return_X_y=True, split="TEST", return_type="pd-multiindex" + ... ) + >>> + >>> tuned_naive.fit(X_train, y_train) + TSCOptCV(...) + >>> y_pred = tuned_naive.predict(X_test) + + 3. obtaining best parameters and best estimator + >>> best_params = tuned_naive.best_params_ + >>> best_classifier = tuned_naive.best_estimator_ + """ + + _tags = { + "authors": "fkiraly", + "maintainers": "fkiraly", + "python_dependencies": "sktime", + } + + # attribute for _DelegatedClassifier, which then delegates + # all non-overridden methods are same as of getattr(self, _delegate_name) + # see further details in _DelegatedClassifier docstring + _delegate_name = "best_estimator_" + + def __init__( + self, + estimator, + optimizer, + cv=None, + scoring=None, + refit=True, + error_score=np.nan, + backend=None, + backend_params=None, + ): + self.estimator = estimator + self.optimizer = optimizer + self.cv = cv + self.scoring = scoring + self.refit = refit + self.error_score = error_score + self.backend = backend + self.backend_params = backend_params + super().__init__() + + def _fit(self, X, y, C=None): + """Fit regressor to training data. + + Writes to self: + Sets fitted model attributes ending in "_". + + Changes state to "fitted" = sets is_fitted flag to True + + Parameters + ---------- + X : pandas DataFrame + feature instances to fit regressor to + + y : pd.DataFrame, must be same length as X + labels to fit regressor to + + C : pd.DataFrame, optional (default=None) + censoring information for survival analysis, + + * should have same column name as y, same length as X and y + * should have entries 0 and 1 (float or int), + 0 = uncensored, 1 = (right) censored + + if None, all observations are assumed to be uncensored. + + Returns + ------- + self : Reference to self. + """ + estimator = self.estimator.clone() + + experiment = SkproProbaRegExperiment( + estimator=estimator, + scoring=self.scoring, + cv=self.cv, + X=X, + y=y, + C=C, + error_score=self.error_score, + backend=self.backend, + backend_params=self.backend_params, + ) + + optimizer = self.optimizer.clone() + optimizer.set_params(experiment=experiment) + best_params = optimizer.solve() + + self.best_params_ = best_params + self.best_estimator_ = estimator.set_params(**best_params) + + # Refit model with best parameters. + if self.refit: + self.best_estimator_.fit(X=X, y=y, C=C) + + return self + + def _predict(self, X): + """Predict labels for data from features. + + State required: + Requires state to be "fitted" = self.is_fitted=True + + Accesses in self: + Fitted model attributes ending in "_" + + Parameters + ---------- + X : pandas DataFrame, must have same columns as X in `fit` + data to predict labels for + + Returns + ------- + y : pandas DataFrame, same length as `X`, same columns as `y` in `fit` + labels predicted for `X` + """ + if not self.refit: + raise RuntimeError( + f"In {self.__class__.__name__}, refit must be True to make predictions," + f" but found refit=False. If refit=False, {self.__class__.__name__} can" + " be used only to tune hyper-parameters, as a parameter estimator." + ) + return super()._predict(X=X) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return ``"default"`` set. + + Returns + ------- + params : dict or list of dict + """ + from sklearn.metrics import accuracy_score + from sklearn.model_selection import KFold + from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier + from sktime.classification.dummy import DummyClassifier + + from hyperactive.opt.gfo import HillClimbing + from hyperactive.opt.gridsearch import GridSearchSk + from hyperactive.opt.random_search import RandomSearchSk + + params_gridsearch = { + "estimator": DummyClassifier(), + "optimizer": GridSearchSk( + param_grid={"strategy": ["most_frequent", "stratified"]} + ), + } + params_randomsearch = { + "estimator": DummyClassifier(), + "cv": 2, + "optimizer": RandomSearchSk( + param_distributions={"strategy": ["most_frequent", "stratified"]}, + ), + "scoring": accuracy_score, + } + params_hillclimb = { + "estimator": KNeighborsTimeSeriesClassifier(), + "cv": KFold(n_splits=2, shuffle=False), + "optimizer": HillClimbing( + search_space={"n_neighbors": [1, 2, 4]}, + n_iter=10, + n_neighbours=5, + ), + "scoring": "cross-entropy", + } + return [params_gridsearch, params_randomsearch, params_hillclimb] diff --git a/src/hyperactive/integrations/skpro/tests/__init__.py b/src/hyperactive/integrations/skpro/tests/__init__.py new file mode 100644 index 00000000..f610fcb1 --- /dev/null +++ b/src/hyperactive/integrations/skpro/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for integrations for skpro.""" diff --git a/src/hyperactive/integrations/skpro/tests/test_skpro_estimators.py b/src/hyperactive/integrations/skpro/tests/test_skpro_estimators.py new file mode 100644 index 00000000..8032dee3 --- /dev/null +++ b/src/hyperactive/integrations/skpro/tests/test_skpro_estimators.py @@ -0,0 +1,22 @@ +"""Integration tests for skpro tuners.""" +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import pytest +from skbase.utils.dependencies import _check_soft_dependencies + +if _check_soft_dependencies("skpro", severity="none"): + from hyperactive.integrations.skpro import ProbaRegOptCV + + EST_TO_TEST = [ProbaRegOptCV] +else: + EST_TO_TEST = [] + + +@pytest.mark.parametrize("estimator", EST_TO_TEST) +def test_sktime_estimator(estimator): + """Test sktime estimator via check_estimator.""" + from skpro.utils.estimator_checks import check_estimator + + check_estimator(estimator, raise_exceptions=True) + # The above line collects all API conformance tests in skpro and runs them. + # It will raise an error if the estimator is not API conformant. From dbd72b42a94d0d14cef1547d57a300deb3077775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 01:05:18 +0200 Subject: [PATCH 12/20] test_params --- .../integrations/skpro/_probareg.py | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py index b6b395fa..09c3dcfb 100644 --- a/src/hyperactive/integrations/skpro/_probareg.py +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -297,37 +297,40 @@ def get_test_params(cls, parameter_set="default"): ------- params : dict or list of dict """ - from sklearn.metrics import accuracy_score from sklearn.model_selection import KFold - from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier - from sktime.classification.dummy import DummyClassifier + from skpro.metrics import CRPS, ConcordanceHarrell + from skpro.regression.bootstrap import BootstrapRegressor + from skpro.regression.residual import ResidualDouble + from skpro.survival.compose import ConditionUncensored from hyperactive.opt.gfo import HillClimbing from hyperactive.opt.gridsearch import GridSearchSk from hyperactive.opt.random_search import RandomSearchSk params_gridsearch = { - "estimator": DummyClassifier(), + "estimator": ResidualDouble.create_test_instance(), "optimizer": GridSearchSk( - param_grid={"strategy": ["most_frequent", "stratified"]} + param_grid={"distr_type": ["Normal", "Laplace"]} ), } - params_randomsearch = { - "estimator": DummyClassifier(), - "cv": 2, + param_randomsearch = { + "estimator": ResidualDouble.create_test_instance(), "optimizer": RandomSearchSk( - param_distributions={"strategy": ["most_frequent", "stratified"]}, + param_distributions={"distr_type": ["Normal", "Laplace"]}, + n_iter=2, ), - "scoring": accuracy_score, + "cv": 2, + "scoring": CRPS(), } params_hillclimb = { - "estimator": KNeighborsTimeSeriesClassifier(), - "cv": KFold(n_splits=2, shuffle=False), + "estimator": ConditionUncensored(BootstrapRegressor()), + "cv": KFold(n_splits=2), "optimizer": HillClimbing( - search_space={"n_neighbors": [1, 2, 4]}, + search_space={"estimator__n_bootstrap_samples": [3, 7, 12]}, n_iter=10, n_neighbours=5, ), - "scoring": "cross-entropy", + "scoring": ConcordanceHarrell(), } - return [params_gridsearch, params_randomsearch, params_hillclimb] + + return [params_gridsearch, param_randomsearch, params_hillclimb] From b76239a259c4a5991b820aa957d7e866836f347b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 01:06:28 +0200 Subject: [PATCH 13/20] Update _probareg.py --- src/hyperactive/integrations/skpro/_probareg.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py index 09c3dcfb..05f8eff1 100644 --- a/src/hyperactive/integrations/skpro/_probareg.py +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -309,9 +309,7 @@ def get_test_params(cls, parameter_set="default"): params_gridsearch = { "estimator": ResidualDouble.create_test_instance(), - "optimizer": GridSearchSk( - param_grid={"distr_type": ["Normal", "Laplace"]} - ), + "optimizer": GridSearchSk(param_grid={"distr_type": ["Normal", "Laplace"]}), } param_randomsearch = { "estimator": ResidualDouble.create_test_instance(), From c35f694bc2734129e1dde9cfe66ce36fec3c0397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 01:15:17 +0200 Subject: [PATCH 14/20] Update _probareg.py --- src/hyperactive/integrations/skpro/_probareg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py index 05f8eff1..0b0755c4 100644 --- a/src/hyperactive/integrations/skpro/_probareg.py +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -321,7 +321,7 @@ def get_test_params(cls, parameter_set="default"): "scoring": CRPS(), } params_hillclimb = { - "estimator": ConditionUncensored(BootstrapRegressor()), + "estimator": ConditionUncensored(BootstrapRegressor.create_test_instance()), "cv": KFold(n_splits=2), "optimizer": HillClimbing( search_space={"estimator__n_bootstrap_samples": [3, 7, 12]}, From 2cf24bb071732165e0ae9fe0e3e7af6a6b821619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 01:40:56 +0200 Subject: [PATCH 15/20] Update _probareg.py --- .../integrations/skpro/_probareg.py | 48 +++++++++---------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py index 0b0755c4..9bdc240e 100644 --- a/src/hyperactive/integrations/skpro/_probareg.py +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -12,10 +12,10 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): - """Tune an sktime classifier via any optimizer in the hyperactive toolbox. + """Tune an skpro regressor via any optimizer in the hyperactive toolbox. - ``TSCOptCV`` uses any available tuning engine from ``hyperactive`` - to tune a classifier by backtesting. + ``ProbaRegOptCV`` uses any available tuning engine from ``hyperactive`` + to tune a probabilistic regressor. It passes backtesting results as scores to the tuning engine, which identifies the best hyperparameters. @@ -29,12 +29,12 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): Configuration of the tuning engine is as per the respective documentation. - Formally, ``TSCOptCV`` does the following: + Formally, ``ProbaRegOptCV`` does the following: In ``fit``: * wraps the ``estimator``, ``scoring``, and other parameters - into a ``SktimeClassificationExperiment`` instance, which is passed to the + into a ``SkproProbaRegExperiment`` instance, which is passed to the optimizer ``optimizer`` as the ``experiment`` argument. * Optimal parameters are then obtained from ``optimizer.solve``, and set as ``best_params_`` and ``best_estimator_`` attributes. @@ -45,8 +45,8 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): Parameters ---------- - estimator : sktime classifier, BaseClassifier instance or interface compatible - The classifier to tune, must implement the sktime classifier interface. + estimator : skpro regressor, BaseProbaRegressor instance or interface compatible + The regressor to tune, must implement the skpro regressor interface. optimizer : hyperactive BaseOptimizer The optimizer to be used for hyperparameter search. @@ -59,11 +59,6 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True`` - An iterable yielding (train, test) splits as arrays of indices. - For integer/None inputs, if the estimator is a classifier and ``y`` is - either binary or multiclass, :class:`StratifiedKFold` is used. In all - other cases, :class:`KFold` is used. These splitters are instantiated - with ``shuffle=False`` so the splits will be the same across calls. - scoring : str, callable, default=None Strategy to evaluate the performance of the cross-validated model on the test set. Can be: @@ -138,34 +133,35 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): For illustration, we use grid search, this can be replaced by any other optimizer. 1. defining the tuned estimator: - >>> from sktime.classification.dummy import DummyClassifier + >>> from skpro.regression.dummy import DummyProbaRegressor + >>> from skpro.metrics import CRPS >>> from sklearn.model_selection import KFold - >>> from hyperactive.integrations.sktime import TSCOptCV + >>> from hyperactive.integrations.sktime import ProbaRegOptCV >>> from hyperactive.opt import GridSearchSk as GridSearch >>> - >>> param_grid = {"strategy": ["most_frequent", "stratified"]} - >>> tuned_naive = TSCOptCV( - ... DummyClassifier(), + >>> param_grid = {"strategy": ["empirical", "normal"]} + >>> tuned_naive = ProbaRegOptCV( + ... DummyProbaRegressor(), ... GridSearch(param_grid), ... cv=KFold(n_splits=2, shuffle=False), + ... scoring=CRPS(), ... ) 2. fitting the tuned estimator: - >>> from sktime.datasets import load_unit_test - >>> X_train, y_train = load_unit_test( - ... return_X_y=True, split="TRAIN", return_type="pd-multiindex" - ... ) - >>> X_test, _ = load_unit_test( - ... return_X_y=True, split="TEST", return_type="pd-multiindex" - ... ) + >>> import pandas as pd + >>> from sklearn.datasets import load_diabetes + >>> from sklearn.model_selection import train_test_split + >>> X, y = load_diabetes(return_X_y=True, as_frame=True) + >>> y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame + >>> X_train, y_train, X_test, y_test = train_test_split(X, y) >>> >>> tuned_naive.fit(X_train, y_train) - TSCOptCV(...) + ProbaRegOptCV(...) >>> y_pred = tuned_naive.predict(X_test) 3. obtaining best parameters and best estimator >>> best_params = tuned_naive.best_params_ - >>> best_classifier = tuned_naive.best_estimator_ + >>> best_regressor = tuned_naive.best_estimator_ """ _tags = { From 6e3b747f01d8a0a3a9ada225456898982a396769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 01:51:25 +0200 Subject: [PATCH 16/20] Update _probareg.py --- src/hyperactive/integrations/skpro/_probareg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py index 9bdc240e..761ccadc 100644 --- a/src/hyperactive/integrations/skpro/_probareg.py +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -136,7 +136,7 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): >>> from skpro.regression.dummy import DummyProbaRegressor >>> from skpro.metrics import CRPS >>> from sklearn.model_selection import KFold - >>> from hyperactive.integrations.sktime import ProbaRegOptCV + >>> from hyperactive.integrations.skpro import ProbaRegOptCV >>> from hyperactive.opt import GridSearchSk as GridSearch >>> >>> param_grid = {"strategy": ["empirical", "normal"]} From 232a283b31f291196f6331aba15eabdbbec39ba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 01:58:21 +0200 Subject: [PATCH 17/20] Update _probareg.py --- src/hyperactive/integrations/skpro/_probareg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py index 761ccadc..ed7a782d 100644 --- a/src/hyperactive/integrations/skpro/_probareg.py +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -153,7 +153,7 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): >>> from sklearn.model_selection import train_test_split >>> X, y = load_diabetes(return_X_y=True, as_frame=True) >>> y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame - >>> X_train, y_train, X_test, y_test = train_test_split(X, y) + >>> X_train, X_test, y_train, y_test = train_test_split(X, y) >>> >>> tuned_naive.fit(X_train, y_train) ProbaRegOptCV(...) From ad15b382e295e3272c65e5a6bfbced6e176c9237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 16:01:17 +0200 Subject: [PATCH 18/20] Update _probareg.py --- src/hyperactive/integrations/skpro/_probareg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py index ed7a782d..9ddb63d9 100644 --- a/src/hyperactive/integrations/skpro/_probareg.py +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -167,7 +167,7 @@ class ProbaRegOptCV(_DelegatedProbaRegressor): _tags = { "authors": "fkiraly", "maintainers": "fkiraly", - "python_dependencies": "sktime", + "python_dependencies": "skpro", } # attribute for _DelegatedClassifier, which then delegates From 5fefa69e9aec70cb8cd3fedd201d28682f863d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 16:02:05 +0200 Subject: [PATCH 19/20] Update _skl_cv.py --- src/hyperactive/experiment/integrations/_skl_cv.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/_skl_cv.py b/src/hyperactive/experiment/integrations/_skl_cv.py index 35f8da7d..b56edb6c 100644 --- a/src/hyperactive/experiment/integrations/_skl_cv.py +++ b/src/hyperactive/experiment/integrations/_skl_cv.py @@ -16,8 +16,6 @@ def _coerce_cv(cv): cv_splitter : cross-validation generator or iterable A sklearn-compatible cross-validation splitter. """ - from sklearn.model_selection import KFold - # default handling for cv if isinstance(cv, int): from sklearn.model_selection import KFold From d60ccf4d79713caa2b9c645d207038619fdfd3da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 27 Sep 2025 16:02:26 +0200 Subject: [PATCH 20/20] Update skpro_probareg.py --- src/hyperactive/experiment/integrations/skpro_probareg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py index 6a7cb830..a6a99f17 100644 --- a/src/hyperactive/experiment/integrations/skpro_probareg.py +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -1,4 +1,4 @@ -"""Experiment adapter for skpro corss-validation experiments.""" +"""Experiment adapter for skpro cross-validation experiments.""" # copyright: hyperactive developers, MIT License (see LICENSE file)