From 376d68e34ac2bc78b8c813c088cc8ef166ff728a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 21:59:50 +0200 Subject: [PATCH 01/29] Create sktime_cv.py --- .../experiment/integrations/sktime_cv.py | 299 ++++++++++++++++++ 1 file changed, 299 insertions(+) create mode 100644 src/hyperactive/experiment/integrations/sktime_cv.py diff --git a/src/hyperactive/experiment/integrations/sktime_cv.py b/src/hyperactive/experiment/integrations/sktime_cv.py new file mode 100644 index 00000000..9e9bd74a --- /dev/null +++ b/src/hyperactive/experiment/integrations/sktime_cv.py @@ -0,0 +1,299 @@ +"""Experiment adapter for sktime backtesting experiments.""" +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import numpy as np +from sklearn import clone +from sklearn.metrics import check_scoring +from sklearn.model_selection import cross_validate +from sklearn.utils.validation import _num_samples + +from hyperactive.base import BaseExperiment + +class SktimeForecastingExperiment(BaseExperiment): + """Experiment adapter for time backtesting experiments. + + This class is used to perform backtesting experiments using a given + sktime forecaster. It allows for hyperparameter tuning and evaluation of + the model's performance. + + The score returned is the summary backtesting score, + of applying ``sktime`` ``evaluate`` to ``estimator`` with the parameters given in + ``score`` ``params``. + + The performed is specified by the ``cv`` parameter, + and the scoring metric is specified by the ``scoring`` parameter. + The ``X`` and ``y`` parameters are the input data and target values, + which are used in fit/predict cross-validation. + + Parameters + ---------- + forecaster : sktime BaseForecaster descendant (concrete forecaster) + sktime forecaster to benchmark + + cv : sktime BaseSplitter descendant + determines split of ``y`` and possibly ``X`` into test and train folds + y is always split according to ``cv``, see above + if ``cv_X`` is not passed, ``X`` splits are subset to ``loc`` equal to ``y`` + if ``cv_X`` is passed, ``X`` is split according to ``cv_X`` + + y : sktime time series container + Target (endogeneous) time series used in the evaluation experiment + + X : sktime time series container, of same mtype as y + Exogenous time series used in the evaluation experiment + + strategy : {"refit", "update", "no-update_params"}, optional, default="refit" + defines the ingestion mode when the forecaster sees new data when window expands + "refit" = forecaster is refitted to each training window + "update" = forecaster is updated with training window data, in sequence provided + "no-update_params" = fit to first training window, re-used without fit or update + + scoring : subclass of sktime.performance_metrics.BaseMetric, + default=None. Used to get a score function that takes y_pred and y_test + arguments and accept y_train as keyword argument. + If None, then uses scoring = MeanAbsolutePercentageError(symmetric=True). + + error_score : "raise" or numeric, default=np.nan + Value to assign to the score if an exception occurs in estimator fitting. If set + to "raise", the exception is raised. If a numeric value is given, + FitFailedWarning is raised. + + cv_X : sktime BaseSplitter descendant, optional + determines split of ``X`` into test and train folds + default is ``X`` being split to identical ``loc`` indices as ``y`` + if passed, must have same number of splits as ``cv`` + + backend : string, by default "None". + Parallelization backend to use for runs. + Runs parallel evaluate if specified and ``strategy="refit"``. + + - "None": executes loop sequentially, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "dask_lazy": same as "dask", + but changes the return to (lazy) ``dask.dataframe.DataFrame``. + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, + e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + + Example + ------- + >>> from hyperactive.experiment.integrations import SklearnCvExperiment + >>> from sklearn.datasets import load_iris + >>> from sklearn.svm import SVC + >>> from sklearn.metrics import accuracy_score + >>> from sklearn.model_selection import KFold + >>> + >>> X, y = load_iris(return_X_y=True) + >>> + >>> sklearn_exp = SklearnCvExperiment( + ... estimator=SVC(), + ... scoring=accuracy_score, + ... cv=KFold(n_splits=3, shuffle=True), + ... X=X, + ... y=y, + ... ) + >>> params = {"C": 1.0, "kernel": "linear"} + >>> score, add_info = sklearn_exp.score(params) + + For default choices of ``scoring`` and ``cv``: + >>> sklearn_exp = SklearnCvExperiment( + ... estimator=SVC(), + ... X=X, + ... y=y, + ... ) + >>> params = {"C": 1.0, "kernel": "linear"} + >>> score, add_info = sklearn_exp.score(params) + + Quick call without metadata return or dictionary: + >>> score = sklearn_exp(C=1.0, kernel="linear") + """ + + _tags = { + "authors": "fkiraly", + "maintainers": "fkiraly", + "python_dependencies": "sktime", # python dependencies + } + + def __init__( + self, + forecaster, + cv, + y, + X=None, + strategy="refit", + scoring=None, + error_score=np.nan, + cv_X=None, + backend=None, + backend_params=None, + ): + self.forecaster = forecaster + self.X = X + self.y = y + self.strategy = strategy + self.scoring = scoring + self.cv = cv + self.error_score = error_score + self.cv_X = cv_X + self.backend = backend + self.backend_params = backend_params + + super().__init__() + + if scoring.get_tag("lower_is_better", False): + higher_or_lower_better = "lower" + else: + higher_or_lower_better = "higher" + self.set_tags(**{"property:higher_or_lower_is_better": higher_or_lower_better}) + + def _paramnames(self): + """Return the parameter names of the search. + + Returns + ------- + list of str + The parameter names of the search parameters. + """ + return list(self.estimator.get_params().keys()) + + def _evaluate(self, params): + """Evaluate the parameters. + + Parameters + ---------- + params : dict with string keys + Parameters to evaluate. + + Returns + ------- + float + The value of the parameters as per evaluation. + dict + Additional metadata about the search. + """ + from sktime.forecasting.model_evaluation import evaluate + + results = evaluate( + self.forecaster, + cv=self.cv, + y=self.y, + X=self.X, + strategy=self.strategy, + scoring=self.scoring, + error_score=self.error_score, + cv_X=self.cv_X, + backend=self.backend, + backend_params=self.backend_params, + ) + + result_name = f"test_{self.scoring.name}" + + res_float = results[result_name].mean() + + return res_float, results + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the skbase object. + + ``get_test_params`` is a unified interface point to store + parameter settings for testing purposes. This function is also + used in ``create_test_instance`` and ``create_test_instances_and_names`` + to construct test instances. + + ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``. + + Each ``dict`` is a parameter configuration for testing, + and can be used to construct an "interesting" test instance. + A call to ``cls(**params)`` should + be valid for all dictionaries ``params`` in the return of ``get_test_params``. + + The ``get_test_params`` need not return fixed lists of dictionaries, + it can also return dynamic or stochastic parameter settings. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + from sktime.datasets import load_airline, load_longley + from sktime.forecasting.naive import NaiveForecaster + from sktime.split import ExpandingWindowSplitter + + y = load_airline() + params0 = { + "forecaster": NaiveForecaster(strategy="last"), + "cv": ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12), + "y": y, + } + + from sktime.performance_metrics import MeanAbsolutePercentageError + + y, X = load_longley(return_X_y=True) + params1 = { + "forecaster": NaiveForecaster(strategy="last"), + "cv": ExpandingWindowSplitter(initial_window=12, step_length=6, fh=6), + "y": y, + "X": X, + "scoring": MeanAbsolutePercentageError(symmetric=False), + } + + return [params0, params1] + + @classmethod + def _get_score_params(self): + """Return settings for testing score/evaluate functions. Used in tests only. + + Returns a list, the i-th element should be valid arguments for + self.evaluate and self.score, of an instance constructed with + self.get_test_params()[i]. + + Returns + ------- + list of dict + The parameters to be used for scoring. + """ + val0 = {"strategy": "mean"} + val1 = {"strategy": "last"} + return [val0, val1] From b5eee086580290ff0993d5a973ae3b83be5c0db6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:03:42 +0200 Subject: [PATCH 02/29] rename, export --- src/hyperactive/experiment/integrations/__init__.py | 5 ++++- .../integrations/{sktime_cv.py => sktime_forecasting.py} | 0 2 files changed, 4 insertions(+), 1 deletion(-) rename src/hyperactive/experiment/integrations/{sktime_cv.py => sktime_forecasting.py} (100%) diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py index 2d043871..9da755d7 100644 --- a/src/hyperactive/experiment/integrations/__init__.py +++ b/src/hyperactive/experiment/integrations/__init__.py @@ -3,5 +3,8 @@ from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment +from hyperactive.experiment.integrations.sktime_forecasting import ( + SktimeForecastingExperiment, +) -__all__ = ["SklearnCvExperiment"] +__all__ = ["SklearnCvExperiment", "SktimeForecastingExperiment"] diff --git a/src/hyperactive/experiment/integrations/sktime_cv.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py similarity index 100% rename from src/hyperactive/experiment/integrations/sktime_cv.py rename to src/hyperactive/experiment/integrations/sktime_forecasting.py From 3cf7f1dc3ed40a19fe15af78a303c7b81ba31201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:03:45 +0200 Subject: [PATCH 03/29] tests --- Makefile | 2 +- pyproject.toml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 746de3f3..5f6d9a64 100644 --- a/Makefile +++ b/Makefile @@ -87,7 +87,7 @@ install-no-extras-for-test: python -m pip install .[test] install-all-extras-for-test: - python -m pip install .[all_extras,test] + python -m pip install .[all_extras,sktime-integration,test] install-editable: pip install -e . diff --git a/pyproject.toml b/pyproject.toml index e9d58347..e1661c10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,9 @@ dependencies = [ sklearn-integration = [ "scikit-learn <1.8.0", ] +sktime-integration = [ + "sktime", +] build = [ "setuptools", "build", From 3b8e06bb83ef25f2108aadb6a036543b34f55f7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:04:07 +0200 Subject: [PATCH 04/29] linting --- .../experiment/integrations/sktime_forecasting.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 9e9bd74a..403c0a6d 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -2,13 +2,10 @@ # copyright: hyperactive developers, MIT License (see LICENSE file) import numpy as np -from sklearn import clone -from sklearn.metrics import check_scoring -from sklearn.model_selection import cross_validate -from sklearn.utils.validation import _num_samples from hyperactive.base import BaseExperiment + class SktimeForecastingExperiment(BaseExperiment): """Experiment adapter for time backtesting experiments. From dd440ad493b287db712b74519e53a5df3b323283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:08:55 +0200 Subject: [PATCH 05/29] expl --- .../integrations/sktime_forecasting.py | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 403c0a6d..77e251cb 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -106,35 +106,31 @@ class SktimeForecastingExperiment(BaseExperiment): Example ------- - >>> from hyperactive.experiment.integrations import SklearnCvExperiment - >>> from sklearn.datasets import load_iris - >>> from sklearn.svm import SVC - >>> from sklearn.metrics import accuracy_score - >>> from sklearn.model_selection import KFold + >>> from hyperactive.experiment.integrations import SktimeForecastingExperiment + >>> from sktime.datasets import load_airline + >>> from sktime.forecasting.naive import NaiveForecaster + >>> from sktime.performance_metrics import MeanAbsolutePercentageError + >>> from sktime.split import ExpandingWindowSplitter >>> - >>> X, y = load_iris(return_X_y=True) - >>> - >>> sklearn_exp = SklearnCvExperiment( - ... estimator=SVC(), - ... scoring=accuracy_score, - ... cv=KFold(n_splits=3, shuffle=True), - ... X=X, + >>> sktime_exp = SktimeForecastingExperiment( + ... forecaster=NaiveForecaster(strategy="last"), + ... scoring=MeanAbsolutePercentageError(), + ... cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12), ... y=y, ... ) - >>> params = {"C": 1.0, "kernel": "linear"} - >>> score, add_info = sklearn_exp.score(params) + >>> params = {"strategy": "mean"} + >>> score, add_info = sktime_exp.score(params) For default choices of ``scoring`` and ``cv``: - >>> sklearn_exp = SklearnCvExperiment( - ... estimator=SVC(), - ... X=X, + >>> sktime_exp = SklearnCvExperiment( + ... estimator=NaiveForecaster(strategy="last"), ... y=y, ... ) - >>> params = {"C": 1.0, "kernel": "linear"} - >>> score, add_info = sklearn_exp.score(params) + >>> params = {"strategy": "mean"} + >>> score, add_info = sktime_exp.score(params) Quick call without metadata return or dictionary: - >>> score = sklearn_exp(C=1.0, kernel="linear") + >>> score = sktime_exp(strategy="mean") """ _tags = { From fd6d2db0e0d906f42e618ff8f50dd8d0de70ef2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:09:17 +0200 Subject: [PATCH 06/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 77e251cb..788defc3 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -121,9 +121,10 @@ class SktimeForecastingExperiment(BaseExperiment): >>> params = {"strategy": "mean"} >>> score, add_info = sktime_exp.score(params) - For default choices of ``scoring`` and ``cv``: + For default choices of ``scoring``: >>> sktime_exp = SklearnCvExperiment( ... estimator=NaiveForecaster(strategy="last"), + ... cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12), ... y=y, ... ) >>> params = {"strategy": "mean"} From 8be9a6450587a88e6aa2ea19eaaec88ac0fb5c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:22:25 +0200 Subject: [PATCH 07/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 788defc3..8100ae04 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -262,7 +262,7 @@ def get_test_params(cls, parameter_set="default"): "y": y, } - from sktime.performance_metrics import MeanAbsolutePercentageError + from sktime.performance_metrics.forecating import MeanAbsolutePercentageError y, X = load_longley(return_X_y=True) params1 = { From f5184b89af482f5bc731a8fe8bf03c238961fb86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:25:25 +0200 Subject: [PATCH 08/29] linting --- Makefile | 2 +- src/hyperactive/experiment/integrations/__init__.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5f6d9a64..f6059c7b 100644 --- a/Makefile +++ b/Makefile @@ -94,4 +94,4 @@ install-editable: reinstall: uninstall install -reinstall-editable: uninstall install-editable \ No newline at end of file +reinstall-editable: uninstall install-editable diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py index 9da755d7..1b600df2 100644 --- a/src/hyperactive/experiment/integrations/__init__.py +++ b/src/hyperactive/experiment/integrations/__init__.py @@ -1,7 +1,6 @@ """Integrations with packages for tuning.""" # copyright: hyperactive developers, MIT License (see LICENSE file) - from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment from hyperactive.experiment.integrations.sktime_forecasting import ( SktimeForecastingExperiment, From c2e08b8449a1a36e012648600c3f09766beef59a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:27:12 +0200 Subject: [PATCH 09/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 8100ae04..afaa05b3 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -262,7 +262,7 @@ def get_test_params(cls, parameter_set="default"): "y": y, } - from sktime.performance_metrics.forecating import MeanAbsolutePercentageError + from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError y, X = load_longley(return_X_y=True) params1 = { From ea6df2b01596722239e214d1dae5224944e1de36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:34:06 +0200 Subject: [PATCH 10/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index afaa05b3..8bc9daf2 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -264,7 +264,7 @@ def get_test_params(cls, parameter_set="default"): from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError - y, X = load_longley(return_X_y=True) + y, X = load_longley() params1 = { "forecaster": NaiveForecaster(strategy="last"), "cv": ExpandingWindowSplitter(initial_window=12, step_length=6, fh=6), From f629d217f2ebfe371ee78b885eaab08717ca7bf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:36:28 +0200 Subject: [PATCH 11/29] Update test_all_objects.py --- src/hyperactive/tests/test_all_objects.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/hyperactive/tests/test_all_objects.py b/src/hyperactive/tests/test_all_objects.py index 3635b3bb..70d6a75b 100644 --- a/src/hyperactive/tests/test_all_objects.py +++ b/src/hyperactive/tests/test_all_objects.py @@ -121,11 +121,14 @@ def _all_objects(self): if isclass(filter): obj_list = [obj for obj in obj_list if issubclass(obj, filter)] - # run_test_for_class selects the estimators to run - # based on whether they have changed, and whether they have all dependencies - # internally, uses the ONLY_CHANGED_MODULES flag, - # and checks the python env against python_dependencies tag - # obj_list = [obj for obj in obj_list if run_test_for_class(obj)] + # only run tests if all soft dependencies are present + def softdeps_present(obj): + """Check if the object has all dependencies present.""" + from skbase.utils.dependencies import _check_estimator_deps + + return _check_estimator_deps(obj, severity="none") + + obj_list = [obj for obj in obj_list if softdeps_present(obj)] return obj_list From 1c7d8693548b1d22b6520600d8ff367037535646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:42:37 +0200 Subject: [PATCH 12/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 8bc9daf2..5de5da3d 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -17,7 +17,7 @@ class SktimeForecastingExperiment(BaseExperiment): of applying ``sktime`` ``evaluate`` to ``estimator`` with the parameters given in ``score`` ``params``. - The performed is specified by the ``cv`` parameter, + The backtesting performed is specified by the ``cv`` parameter, and the scoring metric is specified by the ``scoring`` parameter. The ``X`` and ``y`` parameters are the input data and target values, which are used in fit/predict cross-validation. @@ -166,7 +166,7 @@ def __init__( super().__init__() - if scoring.get_tag("lower_is_better", False): + if scoring is None or scoring.get_tag("lower_is_better", False): higher_or_lower_better = "lower" else: higher_or_lower_better = "higher" From 111b151ef0ee78c1819ff59d9e458a9861b11d4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:43:40 +0200 Subject: [PATCH 13/29] Update test_all_objects.py --- src/hyperactive/tests/test_all_objects.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hyperactive/tests/test_all_objects.py b/src/hyperactive/tests/test_all_objects.py index 70d6a75b..8baa65e1 100644 --- a/src/hyperactive/tests/test_all_objects.py +++ b/src/hyperactive/tests/test_all_objects.py @@ -1,7 +1,6 @@ """Automated tests based on the skbase test suite template.""" from inspect import isclass -import shutil from skbase.testing import BaseFixtureGenerator as _BaseFixtureGenerator from skbase.testing import QuickTester as _QuickTester @@ -285,11 +284,12 @@ def test_gfo_integration(self, object_instance): optimizer = object_instance # 1. define the experiment - from hyperactive.experiment.integrations import SklearnCvExperiment from sklearn.datasets import load_iris - from sklearn.svm import SVC from sklearn.metrics import accuracy_score from sklearn.model_selection import KFold + from sklearn.svm import SVC + + from hyperactive.experiment.integrations import SklearnCvExperiment X, y = load_iris(return_X_y=True) From b231e6680258993f97a6a5f42ded5faf7d5aaa88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 22:54:33 +0200 Subject: [PATCH 14/29] fixes --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 5de5da3d..c0e74c51 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -109,7 +109,7 @@ class SktimeForecastingExperiment(BaseExperiment): >>> from hyperactive.experiment.integrations import SktimeForecastingExperiment >>> from sktime.datasets import load_airline >>> from sktime.forecasting.naive import NaiveForecaster - >>> from sktime.performance_metrics import MeanAbsolutePercentageError + >>> from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError >>> from sktime.split import ExpandingWindowSplitter >>> >>> sktime_exp = SktimeForecastingExperiment( @@ -180,7 +180,7 @@ def _paramnames(self): list of str The parameter names of the search parameters. """ - return list(self.estimator.get_params().keys()) + return list(self.forecaster.get_params().keys()) def _evaluate(self, params): """Evaluate the parameters. From c07ddeda60a9528f753eb4993ba8bc2c03f601c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 23:11:04 +0200 Subject: [PATCH 15/29] Update sktime_forecasting.py --- .../experiment/integrations/sktime_forecasting.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index c0e74c51..008f9040 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -166,6 +166,15 @@ def __init__( super().__init__() + if scoring is None: + from sktime.performance_metrics.forecasting import ( + MeanAbsolutePercentageError + ) + + self._scoring = MeanAbsolutePercentageError(symmetric=True) + else: + self._scoring = scoring + if scoring is None or scoring.get_tag("lower_is_better", False): higher_or_lower_better = "lower" else: @@ -205,14 +214,14 @@ def _evaluate(self, params): y=self.y, X=self.X, strategy=self.strategy, - scoring=self.scoring, + scoring=self._scoring, error_score=self.error_score, cv_X=self.cv_X, backend=self.backend, backend_params=self.backend_params, ) - result_name = f"test_{self.scoring.name}" + result_name = f"test_{self._scoring.name}" res_float = results[result_name].mean() From a6675460b2401aff681daa1349ea62fb929c0724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 23:11:54 +0200 Subject: [PATCH 16/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 008f9040..7d1ef33d 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -112,6 +112,8 @@ class SktimeForecastingExperiment(BaseExperiment): >>> from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError >>> from sktime.split import ExpandingWindowSplitter >>> + >>> y = load_airline() + >>> >>> sktime_exp = SktimeForecastingExperiment( ... forecaster=NaiveForecaster(strategy="last"), ... scoring=MeanAbsolutePercentageError(), From 94f9173856dba9e0da2b3a646b16d7454c2e2455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 23:13:55 +0200 Subject: [PATCH 17/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 7d1ef33d..3302700e 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -170,7 +170,7 @@ def __init__( if scoring is None: from sktime.performance_metrics.forecasting import ( - MeanAbsolutePercentageError + MeanAbsolutePercentageError, ) self._scoring = MeanAbsolutePercentageError(symmetric=True) From 45f0f3bf1b6aca9e7e25df2147d6725b48853a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 23:18:28 +0200 Subject: [PATCH 18/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 3302700e..36c64949 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -124,7 +124,7 @@ class SktimeForecastingExperiment(BaseExperiment): >>> score, add_info = sktime_exp.score(params) For default choices of ``scoring``: - >>> sktime_exp = SklearnCvExperiment( + >>> sktime_exp = SktimeForecastingExperiment( ... estimator=NaiveForecaster(strategy="last"), ... cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12), ... y=y, @@ -227,7 +227,7 @@ def _evaluate(self, params): res_float = results[result_name].mean() - return res_float, results + return res_float, {"results": results} @classmethod def get_test_params(cls, parameter_set="default"): From 92cb16a2f84b5291017ae2d584de3b5a79eaa5f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 15 Aug 2025 23:35:31 +0200 Subject: [PATCH 19/29] Update sktime_forecasting.py --- src/hyperactive/experiment/integrations/sktime_forecasting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperactive/experiment/integrations/sktime_forecasting.py b/src/hyperactive/experiment/integrations/sktime_forecasting.py index 36c64949..98c53a44 100644 --- a/src/hyperactive/experiment/integrations/sktime_forecasting.py +++ b/src/hyperactive/experiment/integrations/sktime_forecasting.py @@ -125,7 +125,7 @@ class SktimeForecastingExperiment(BaseExperiment): For default choices of ``scoring``: >>> sktime_exp = SktimeForecastingExperiment( - ... estimator=NaiveForecaster(strategy="last"), + ... forecaster=NaiveForecaster(strategy="last"), ... cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12), ... y=y, ... ) @@ -278,7 +278,7 @@ def get_test_params(cls, parameter_set="default"): y, X = load_longley() params1 = { "forecaster": NaiveForecaster(strategy="last"), - "cv": ExpandingWindowSplitter(initial_window=12, step_length=6, fh=6), + "cv": ExpandingWindowSplitter(initial_window=3, step_length=3, fh=1), "y": y, "X": X, "scoring": MeanAbsolutePercentageError(symmetric=False), From 6a485fcf4901cf771e685d7033944d19b12129b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 08:49:24 +0200 Subject: [PATCH 20/29] draft --- .../integrations/sktime/__init__.py | 4 +- .../integrations/sktime/_forecasting.py | 396 ++++++++++++++++++ src/hyperactive/integrations/sktime/main.py | 8 - 3 files changed, 399 insertions(+), 9 deletions(-) create mode 100644 src/hyperactive/integrations/sktime/_forecasting.py delete mode 100644 src/hyperactive/integrations/sktime/main.py diff --git a/src/hyperactive/integrations/sktime/__init__.py b/src/hyperactive/integrations/sktime/__init__.py index 09bdbd71..b4b7b762 100644 --- a/src/hyperactive/integrations/sktime/__init__.py +++ b/src/hyperactive/integrations/sktime/__init__.py @@ -3,4 +3,6 @@ # License: MIT License -from .main import HyperactiveSearchCV +from hyperactive.integrations.sktime._forecasting import ForecastingOptCV + +__all__ = ["ForecastingOptCV"] diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py new file mode 100644 index 00000000..48fa52fc --- /dev/null +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -0,0 +1,396 @@ +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import numpy as np + +from skbase.utils.dependencies import _check_soft_dependencies + +if _check_soft_dependencies("sktime", severity="none"): + from sktime.forecasting.base._delegate import _DelegatedForecaster +else: + from skbase.base import BaseEstimator as _DelegatedForecaster + +from hyperactive.experiment.integrations.sktime_forecasting import ( + SktimeForecastingExperiment, +) + + +class ForecastingOptCV(_DelegatedForecaster): + """Tuning via any optimizer in the hyperactive API. + + Parameters + ---------- + forecaster : sktime forecaster, BaseForecaster instance or interface compatible + The forecaster to tune, must implement the sktime forecaster interface. + + optimizer : hyperactive BaseOptimizer + The optimizer to be used for hyperparameter search. + + cv : sktime BaseSplitter descendant + determines split of ``y`` and possibly ``X`` into test and train folds + y is always split according to ``cv``, see above + if ``cv_X`` is not passed, ``X`` splits are subset to ``loc`` equal to ``y`` + if ``cv_X`` is passed, ``X`` is split according to ``cv_X`` + + strategy : {"refit", "update", "no-update_params"}, optional, default="refit" + defines the ingestion mode when the forecaster sees new data when window expands + "refit" = forecaster is refitted to each training window + "update" = forecaster is updated with training window data, in sequence provided + "no-update_params" = fit to first training window, re-used without fit or update + + update_behaviour : str, optional, default = "full_refit" + one of {"full_refit", "inner_only", "no_update"} + behaviour of the forecaster when calling update + "full_refit" = both tuning parameters and inner estimator refit on all data seen + "inner_only" = tuning parameters are not re-tuned, inner estimator is updated + "no_update" = neither tuning parameters nor inner estimator are updated + + scoring : sktime metric (BaseMetric), str, or callable, optional (default=None) + scoring metric to use in tuning the forecaster + + * sktime metric objects (BaseMetric) descendants can be searched + with the ``registry.all_estimators`` search utility, + for instance via ``all_estimators("metric", as_dataframe=True)`` + + * If callable, must have signature + ``(y_true: 1D np.ndarray, y_pred: 1D np.ndarray) -> float``, + assuming np.ndarrays being of the same length, and lower being better. + Metrics in sktime.performance_metrics.forecasting are all of this form. + + * If str, uses registry.resolve_alias to resolve to one of the above. + Valid strings are valid registry.craft specs, which include + string repr-s of any BaseMetric object, e.g., "MeanSquaredError()"; + and keys of registry.ALIAS_DICT referring to metrics. + + * If None, defaults to MeanAbsolutePercentageError() + + refit : bool, optional (default=True) + True = refit the forecaster with the best parameters on the entire data in fit + False = no refitting takes place. The forecaster cannot be used to predict. + This is to be used to tune the hyperparameters, and then use the estimator + as a parameter estimator, e.g., via get_fitted_params or PluginParamsForecaster. + + return_n_best_forecasters : int, default=1 + In case the n best forecaster should be returned, this value can be set + and the n best forecasters will be assigned to n_best_forecasters_. + Set return_n_best_forecasters to -1 to return all forecasters. + + error_score : "raise" or numeric, default=np.nan + Value to assign to the score if an exception occurs in estimator fitting. If set + to "raise", the exception is raised. If a numeric value is given, + FitFailedWarning is raised. + + cv_X : sktime BaseSplitter descendant, optional + determines split of ``X`` into test and train folds + default is ``X`` being split to identical ``loc`` indices as ``y`` + if passed, must have same number of splits as ``cv`` + + backend : string, by default "None". + Parallelization backend to use for runs. + Runs parallel evaluate if specified and ``strategy="refit"``. + + - "None": executes loop sequentially, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "dask_lazy": same as "dask", + but changes the return to (lazy) ``dask.dataframe.DataFrame``. + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, + e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + + Example + ------- + Tuning sklearn SVC via grid search + + 1. defining the tuned estimator: + >>> from sklearn.svm import SVC + >>> from hyperactive.integrations.sklearn import OptCV + >>> from hyperactive.opt import GridSearchSk as GridSearch + >>> + >>> param_grid = {"kernel": ["linear", "rbf"], "C": [1, 10]} + >>> tuned_svc = OptCV(SVC(), GridSearch(param_grid)) + + 2. fitting the tuned estimator: + >>> from sklearn.datasets import load_iris + >>> from sklearn.model_selection import train_test_split + >>> X, y = load_iris(return_X_y=True) + >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + >>> + >>> tuned_svc.fit(X_train, y_train) + OptCV(...) + >>> y_pred = tuned_svc.predict(X_test) + + 3. obtaining best parameters and best estimator + >>> best_params = tuned_svc.best_params_ + >>> best_estimator = tuned_svc.best_estimator_ + """ + + _tags = { + "authors": "fkiraly", + "maintainers": "fkiraly", + "python_dependencies": "sktime", + } + + # attribute for _DelegatedForecaster, which then delegates + # all non-overridden methods are same as of getattr(self, _delegate_name) + # see further details in _DelegatedForecaster docstring + _delegate_name = "best_forecaster_" + + def __init__( + self, + forecaster, + optimizer, + cv, + strategy="refit", + scoring=None, + refit=True, + return_n_best_forecasters=1, + error_score=np.nan, + cv_X=None, + backend=None, + backend_params=None, + ): + self.forecaster = forecaster + self.optimizer = optimizer + self.cv = cv + self.strategy = strategy + self.scoring = scoring + self.refit = refit + self.return_n_best_forecasters = return_n_best_forecasters + self.error_score = error_score + self.cv_X = cv_X + self.backend = backend + self.backend_params = backend_params + super().__init__() + + def _fit(self, y, X, fh): + """Fit to training data. + + Parameters + ---------- + y : pd.Series + Target time series to which to fit the forecaster. + fh : int, list or np.array, optional (default=None) + The forecasters horizon with the steps ahead to to predict. + X : pd.DataFrame, optional (default=None) + Exogenous variables are ignored + + Returns + ------- + self : returns an instance of self. + """ + from sktime.utils.validation.forecasting import check_scoring + + forecaster = self.forecaster.clone() + + scoring = check_scoring(self.scoring, obj=self) + scoring_name = f"test_{scoring.name}" + + experiment = SktimeForecastingExperiment( + forecaster=forecaster, + scoring=scoring, + cv=self.cv, + X=X, + y=y, + strategy=self.strategy, + error_score=self.error_score, + cv_X=self.cv_X, + backend=self.backend, + backend_params=self.backend_params, + ) + + optimizer = self.optimizer.clone() + optimizer.set_params(experiment=experiment) + best_params, results = optimizer.run() + + self.best_params_ = best_params + self.best_forecaster_ = forecaster.set_params(**best_params) + + # Refit model with best parameters. + if self.refit: + self.best_forecaster_.fit(y=y, X=X, fh=fh) + + # Select best parameters. + self.best_index_ = results.loc[:, f"rank_{scoring_name}"].argmin() + # Raise error if all fits in evaluate failed because all score values are NaN. + if self.best_index_ == -1: + raise RuntimeError( + f"""All fits of forecaster failed, + set error_score='raise' to see the exceptions. + Failed forecaster: {self.forecaster}""" + ) + self.best_score_ = results.loc[self.best_index_, f"mean_{scoring_name}"] + + # Refit model with best parameters. + if self.refit: + self.best_forecaster_.fit(y=y, X=X, fh=fh) + + # Sort values according to rank + results = results.sort_values( + by=f"rank_{scoring_name}", + ascending=True, + ) + # Select n best forecaster + self.n_best_forecasters_ = [] + self.n_best_scores_ = [] + _forecasters_to_return = min(self.return_n_best_forecasters, len(results.index)) + if _forecasters_to_return == -1: + _forecasters_to_return = len(results.index) + for i in range(_forecasters_to_return): + params = results["params"].iloc[i] + rank = results[f"rank_{scoring_name}"].iloc[i] + rank = str(int(rank)) + forecaster = self.forecaster.clone().set_params(**params) + # Refit model with best parameters. + if self.refit: + forecaster.fit(y=y, X=X, fh=fh) + self.n_best_forecasters_.append((rank, forecaster)) + # Save score + score = results[f"mean_{scoring_name}"].iloc[i] + self.n_best_scores_.append(score) + + return self + + def _predict(self, fh, X): + """Forecast time series at future horizon. + + private _predict containing the core logic, called from predict + + State required: + Requires state to be "fitted". + + Accesses in self: + Fitted model attributes ending in "_" + self.cutoff + + Parameters + ---------- + fh : guaranteed to be ForecastingHorizon or None, optional (default=None) + The forecasting horizon with the steps ahead to to predict. + If not passed in _fit, guaranteed to be passed here + X : pd.DataFrame, optional (default=None) + Exogenous time series + + Returns + ------- + y_pred : pd.Series + Point predictions + """ + if not self.refit: + raise RuntimeError( + f"In {self.__class__.__name__}, refit must be True to make predictions," + f" but found refit=False. If refit=False, {self.__class__.__name__} can" + " be used only to tune hyper-parameters, as a parameter estimator." + ) + return super()._predict(fh=fh, X=X) + + def _update(self, y, X=None, update_params=True): + """Update time series to incremental training data. + + Parameters + ---------- + y : guaranteed to be of a type in self.get_tag("y_inner_mtype") + Time series with which to update the forecaster. + if self.get_tag("scitype:y")=="univariate": + guaranteed to have a single column/variable + if self.get_tag("scitype:y")=="multivariate": + guaranteed to have 2 or more columns + if self.get_tag("scitype:y")=="both": no restrictions apply + X : optional (default=None) + guaranteed to be of a type in self.get_tag("X_inner_mtype") + Exogeneous time series for the forecast + update_params : bool, optional (default=True) + whether model parameters should be updated + + Returns + ------- + self : reference to self + """ + update_behaviour = self.update_behaviour + + if update_behaviour == "full_refit": + super()._update(y=y, X=X, update_params=update_params) + elif update_behaviour == "inner_only": + self.best_forecaster_.update(y=y, X=X, update_params=update_params) + elif update_behaviour == "no_update": + self.best_forecaster_.update(y=y, X=X, update_params=False) + else: + raise ValueError( + 'update_behaviour must be one of "full_refit", "inner_only",' + f' or "no_update", but found {update_behaviour}' + ) + return self + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return ``"default"`` set. + + Returns + ------- + params : dict or list of dict + """ + from sktime.forecasting.naive import NaiveForecaster + from sktime.forecasting.trend import PolynomialTrendForecaster + from sktime.performance_metrics.forecasting import ( + MeanAbsolutePercentageError, + mean_absolute_percentage_error, + ) + from sktime.split import SingleWindowSplitter + + params = { + "forecaster": NaiveForecaster(strategy="mean"), + "cv": SingleWindowSplitter(fh=1), + "param_grid": {"window_length": [2, 5]}, + "scoring": MeanAbsolutePercentageError(symmetric=True), + } + params2 = { + "forecaster": PolynomialTrendForecaster(), + "cv": SingleWindowSplitter(fh=1), + "param_grid": {"degree": [1, 2]}, + "scoring": mean_absolute_percentage_error, + "update_behaviour": "inner_only", + } + params3 = { + "forecaster": NaiveForecaster(strategy="mean"), + "cv": SingleWindowSplitter(fh=1), + "param_grid": {"window_length": [3, 4]}, + "scoring": "MeanAbsolutePercentageError(symmetric=True)", + "update_behaviour": "no_update", + } + return [params, params2, params3] diff --git a/src/hyperactive/integrations/sktime/main.py b/src/hyperactive/integrations/sktime/main.py deleted file mode 100644 index f0d7c5db..00000000 --- a/src/hyperactive/integrations/sktime/main.py +++ /dev/null @@ -1,8 +0,0 @@ -# Author: Simon Blanke -# Email: simon.blanke@yahoo.com -# License: MIT License - - -class HyperactiveSearchCV: - def __init__(self) -> None: - pass From 6d769658794ed7c8ec6188e6b55a294a73030476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 09:24:58 +0200 Subject: [PATCH 21/29] Update _forecasting.py --- .../integrations/sktime/_forecasting.py | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index 48fa52fc..de08dee1 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -15,7 +15,7 @@ class ForecastingOptCV(_DelegatedForecaster): - """Tuning via any optimizer in the hyperactive API. + """Tune an sktime forecaster via any optimizer in the hyperactive API. Parameters ---------- @@ -172,6 +172,7 @@ def __init__( optimizer, cv, strategy="refit", + update_behaviour="full_refit", scoring=None, refit=True, return_n_best_forecasters=1, @@ -184,6 +185,7 @@ def __init__( self.optimizer = optimizer self.cv = cv self.strategy = strategy + self.update_behaviour = update_behaviour self.scoring = scoring self.refit = refit self.return_n_best_forecasters = return_n_best_forecasters @@ -373,24 +375,32 @@ def get_test_params(cls, parameter_set="default"): ) from sktime.split import SingleWindowSplitter - params = { + from hyperactive.opt.gfo import HillClimbing + from hyperactive.opt.gridsearch import GridSearchSk + from hyperactive.opt.random_search import RandomSearchSk + + params_gridsearch = { "forecaster": NaiveForecaster(strategy="mean"), "cv": SingleWindowSplitter(fh=1), - "param_grid": {"window_length": [2, 5]}, + "optimizer": GridSearchSk(param_grid={"window_length": [2, 5]}), "scoring": MeanAbsolutePercentageError(symmetric=True), } - params2 = { + params_randomsearch = { "forecaster": PolynomialTrendForecaster(), "cv": SingleWindowSplitter(fh=1), - "param_grid": {"degree": [1, 2]}, + "optimizer": RandomSearchSk(param_distributions={"degree": [1, 2]}), "scoring": mean_absolute_percentage_error, "update_behaviour": "inner_only", } - params3 = { + params_hillclimb = { "forecaster": NaiveForecaster(strategy="mean"), "cv": SingleWindowSplitter(fh=1), - "param_grid": {"window_length": [3, 4]}, + "optimizer": HillClimbing( + search_space={"window_length": [2, 5]}, + max_iter=10, + n_random_starts=5, + ), "scoring": "MeanAbsolutePercentageError(symmetric=True)", "update_behaviour": "no_update", } - return [params, params2, params3] + return [params_gridsearch, params_randomsearch, params_hillclimb] From 88182d63743594661e858247748a4bdc4955f72c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 09:29:08 +0200 Subject: [PATCH 22/29] tests --- .../integrations/sktime/__init__.py | 5 +---- .../integrations/sktime/tests/__init__.py | 1 + .../sktime/tests/test_sktime_estimators.py | 22 +++++++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 src/hyperactive/integrations/sktime/tests/__init__.py create mode 100644 src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py diff --git a/src/hyperactive/integrations/sktime/__init__.py b/src/hyperactive/integrations/sktime/__init__.py index b4b7b762..a88ca2f0 100644 --- a/src/hyperactive/integrations/sktime/__init__.py +++ b/src/hyperactive/integrations/sktime/__init__.py @@ -1,7 +1,4 @@ -# Author: Simon Blanke -# Email: simon.blanke@yahoo.com -# License: MIT License - +"""Integrations for sktime with Hyperactive.""" from hyperactive.integrations.sktime._forecasting import ForecastingOptCV diff --git a/src/hyperactive/integrations/sktime/tests/__init__.py b/src/hyperactive/integrations/sktime/tests/__init__.py new file mode 100644 index 00000000..e78b4da3 --- /dev/null +++ b/src/hyperactive/integrations/sktime/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for integrations for sktime.""" diff --git a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py new file mode 100644 index 00000000..a6653833 --- /dev/null +++ b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py @@ -0,0 +1,22 @@ +"""Integration tests for sktime tuners.""" +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import pytest +from skbase.utils.dependencies import _check_soft_dependencies + +if _check_soft_dependencies("sktime", severity="none"): + from hyperactive.integrations.sktime import ForecastingOptCV + + EST_TO_TEST = [ForecastingOptCV] +else: + EST_TO_TEST = [] + + +@pytest.mark.parametrize("estimator", EST_TO_TEST) +def test_sktime_estimator(estimator): + """Test sktime estimator via check_estimator.""" + from sktime.utils.estimator_checks import check_estimator + + check_estimator(estimator, raise_exception=True) + # The above line collects all API conformance tests in sktime and runs them. + # It will raise an error if the estimator is not API conformant. From 372d1383abfffb9232cfc30dd5e5c951c1b7bbcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 09:37:19 +0200 Subject: [PATCH 23/29] Update _forecasting.py --- src/hyperactive/integrations/sktime/_forecasting.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index de08dee1..9cd26040 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -1,7 +1,6 @@ # copyright: hyperactive developers, MIT License (see LICENSE file) import numpy as np - from skbase.utils.dependencies import _check_soft_dependencies if _check_soft_dependencies("sktime", severity="none"): @@ -230,7 +229,7 @@ def _fit(self, y, X, fh): backend=self.backend, backend_params=self.backend_params, ) - + optimizer = self.optimizer.clone() optimizer.set_params(experiment=experiment) best_params, results = optimizer.run() @@ -397,8 +396,8 @@ def get_test_params(cls, parameter_set="default"): "cv": SingleWindowSplitter(fh=1), "optimizer": HillClimbing( search_space={"window_length": [2, 5]}, - max_iter=10, - n_random_starts=5, + n_iter=10, + n_neighbours=5, ), "scoring": "MeanAbsolutePercentageError(symmetric=True)", "update_behaviour": "no_update", From 5dc4a5fa06f9e7ab595772a1588fe24fc3786a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 09:43:14 +0200 Subject: [PATCH 24/29] Update test_sktime_estimators.py --- .../integrations/sktime/tests/test_sktime_estimators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py index a6653833..1ac4bd77 100644 --- a/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py +++ b/src/hyperactive/integrations/sktime/tests/test_sktime_estimators.py @@ -17,6 +17,6 @@ def test_sktime_estimator(estimator): """Test sktime estimator via check_estimator.""" from sktime.utils.estimator_checks import check_estimator - check_estimator(estimator, raise_exception=True) + check_estimator(estimator, raise_exceptions=True) # The above line collects all API conformance tests in sktime and runs them. # It will raise an error if the estimator is not API conformant. From ee6e82620daef9d6c3dd56a2625de6eeea19740d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 09:47:51 +0200 Subject: [PATCH 25/29] Update test_all_objects.py --- src/hyperactive/tests/test_all_objects.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/hyperactive/tests/test_all_objects.py b/src/hyperactive/tests/test_all_objects.py index 8baa65e1..676956fc 100644 --- a/src/hyperactive/tests/test_all_objects.py +++ b/src/hyperactive/tests/test_all_objects.py @@ -138,10 +138,33 @@ def softdeps_present(obj): class TestAllObjects(BaseFixtureGenerator, _TestAllObjects): """Generic tests for all objects in the package.""" + OBJECT_TYPES_IN_HYPERACTIVE = [ + "experiment", + "optimizer", + ] + def test_doctest_examples(self, object_class): """Runs doctests for estimator class.""" run_doctest(object_class, name=f"class {object_class.__name__}") + def test_valid_object_class_tags(self, object_class): + """Check that object class tags are in self.valid_tags.""" + # stepout for estimators with base classes in other packages + cls_type = object_class.get_class_tag("object_type", None) + if cls_type not in self.OBJECT_TYPES_IN_HYPERACTIVE: + return None + + super().test_valid_object_class_tags(object_class) + + def test_valid_object_tags(self, object_instance): + """Check that object tags are in self.valid_tags.""" + # stepout for estimators with base classes in other packages + obj_type = object_instance.get_tag("object_type", None) + if obj_type not in self.OBJECT_TYPES_IN_HYPERACTIVE: + return None + + super().test_valid_object_class_tags(object_instance) + class ExperimentFixtureGenerator(BaseFixtureGenerator): """Fixture generator for experiments. From 56a723537efa78133e9b5951ab968680288b4f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 09:49:00 +0200 Subject: [PATCH 26/29] Update test_all_objects.py --- src/hyperactive/tests/test_all_objects.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hyperactive/tests/test_all_objects.py b/src/hyperactive/tests/test_all_objects.py index 676956fc..76fd9d37 100644 --- a/src/hyperactive/tests/test_all_objects.py +++ b/src/hyperactive/tests/test_all_objects.py @@ -150,6 +150,7 @@ def test_doctest_examples(self, object_class): def test_valid_object_class_tags(self, object_class): """Check that object class tags are in self.valid_tags.""" # stepout for estimators with base classes in other packages + # e.g., sktime BaseForecaster, BaseClassifier, used in hyperactive.integrations cls_type = object_class.get_class_tag("object_type", None) if cls_type not in self.OBJECT_TYPES_IN_HYPERACTIVE: return None @@ -159,6 +160,7 @@ def test_valid_object_class_tags(self, object_class): def test_valid_object_tags(self, object_instance): """Check that object tags are in self.valid_tags.""" # stepout for estimators with base classes in other packages + # e.g., sktime BaseForecaster, BaseClassifier, used in hyperactive.integrations obj_type = object_instance.get_tag("object_type", None) if obj_type not in self.OBJECT_TYPES_IN_HYPERACTIVE: return None From f997b3a980e3cce10be21c418fa4b42a367c6ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 10:32:03 +0200 Subject: [PATCH 27/29] Update _forecasting.py --- .../integrations/sktime/_forecasting.py | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index 9cd26040..fe165c0c 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -68,11 +68,6 @@ class ForecastingOptCV(_DelegatedForecaster): This is to be used to tune the hyperparameters, and then use the estimator as a parameter estimator, e.g., via get_fitted_params or PluginParamsForecaster. - return_n_best_forecasters : int, default=1 - In case the n best forecaster should be returned, this value can be set - and the n best forecasters will be assigned to n_best_forecasters_. - Set return_n_best_forecasters to -1 to return all forecasters. - error_score : "raise" or numeric, default=np.nan Value to assign to the score if an exception occurs in estimator fitting. If set to "raise", the exception is raised. If a numeric value is given, @@ -174,7 +169,6 @@ def __init__( update_behaviour="full_refit", scoring=None, refit=True, - return_n_best_forecasters=1, error_score=np.nan, cv_X=None, backend=None, @@ -187,7 +181,6 @@ def __init__( self.update_behaviour = update_behaviour self.scoring = scoring self.refit = refit - self.return_n_best_forecasters = return_n_best_forecasters self.error_score = error_score self.cv_X = cv_X self.backend = backend @@ -232,7 +225,7 @@ def _fit(self, y, X, fh): optimizer = self.optimizer.clone() optimizer.set_params(experiment=experiment) - best_params, results = optimizer.run() + best_params = optimizer.run() self.best_params_ = best_params self.best_forecaster_ = forecaster.set_params(**best_params) @@ -241,8 +234,6 @@ def _fit(self, y, X, fh): if self.refit: self.best_forecaster_.fit(y=y, X=X, fh=fh) - # Select best parameters. - self.best_index_ = results.loc[:, f"rank_{scoring_name}"].argmin() # Raise error if all fits in evaluate failed because all score values are NaN. if self.best_index_ == -1: raise RuntimeError( @@ -250,36 +241,11 @@ def _fit(self, y, X, fh): set error_score='raise' to see the exceptions. Failed forecaster: {self.forecaster}""" ) - self.best_score_ = results.loc[self.best_index_, f"mean_{scoring_name}"] # Refit model with best parameters. if self.refit: self.best_forecaster_.fit(y=y, X=X, fh=fh) - # Sort values according to rank - results = results.sort_values( - by=f"rank_{scoring_name}", - ascending=True, - ) - # Select n best forecaster - self.n_best_forecasters_ = [] - self.n_best_scores_ = [] - _forecasters_to_return = min(self.return_n_best_forecasters, len(results.index)) - if _forecasters_to_return == -1: - _forecasters_to_return = len(results.index) - for i in range(_forecasters_to_return): - params = results["params"].iloc[i] - rank = results[f"rank_{scoring_name}"].iloc[i] - rank = str(int(rank)) - forecaster = self.forecaster.clone().set_params(**params) - # Refit model with best parameters. - if self.refit: - forecaster.fit(y=y, X=X, fh=fh) - self.n_best_forecasters_.append((rank, forecaster)) - # Save score - score = results[f"mean_{scoring_name}"].iloc[i] - self.n_best_scores_.append(score) - return self def _predict(self, fh, X): From f1de81920b4f95282aaedac5c1a8676600c107e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 10:32:21 +0200 Subject: [PATCH 28/29] Update _forecasting.py --- src/hyperactive/integrations/sktime/_forecasting.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index fe165c0c..3db04ba8 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -230,18 +230,6 @@ def _fit(self, y, X, fh): self.best_params_ = best_params self.best_forecaster_ = forecaster.set_params(**best_params) - # Refit model with best parameters. - if self.refit: - self.best_forecaster_.fit(y=y, X=X, fh=fh) - - # Raise error if all fits in evaluate failed because all score values are NaN. - if self.best_index_ == -1: - raise RuntimeError( - f"""All fits of forecaster failed, - set error_score='raise' to see the exceptions. - Failed forecaster: {self.forecaster}""" - ) - # Refit model with best parameters. if self.refit: self.best_forecaster_.fit(y=y, X=X, fh=fh) From 200a87e3432d5465c9426c8028818e16c7d01012 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sat, 16 Aug 2025 10:42:58 +0200 Subject: [PATCH 29/29] Update _forecasting.py --- src/hyperactive/integrations/sktime/_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/sktime/_forecasting.py b/src/hyperactive/integrations/sktime/_forecasting.py index 3db04ba8..50f2649a 100644 --- a/src/hyperactive/integrations/sktime/_forecasting.py +++ b/src/hyperactive/integrations/sktime/_forecasting.py @@ -208,7 +208,7 @@ def _fit(self, y, X, fh): forecaster = self.forecaster.clone() scoring = check_scoring(self.scoring, obj=self) - scoring_name = f"test_{scoring.name}" + # scoring_name = f"test_{scoring.name}" experiment = SktimeForecastingExperiment( forecaster=forecaster,