diff --git a/pyproject.toml b/pyproject.toml index 30493a6f..af3dd76a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ sklearn-integration = [ "scikit-learn <1.8.0", ] sktime-integration = [ + "skpro", "sktime", ] build = [ diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py index 4ed0ce52..06c4c584 100644 --- a/src/hyperactive/experiment/integrations/__init__.py +++ b/src/hyperactive/experiment/integrations/__init__.py @@ -2,6 +2,9 @@ # copyright: hyperactive developers, MIT License (see LICENSE file) from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment +from hyperactive.experiment.integrations.skpro_probareg import ( + SkproProbaRegExperiment, +) from hyperactive.experiment.integrations.sktime_classification import ( SktimeClassificationExperiment, ) @@ -11,6 +14,7 @@ __all__ = [ "SklearnCvExperiment", + "SkproProbaRegExperiment", "SktimeClassificationExperiment", "SktimeForecastingExperiment", ] diff --git a/src/hyperactive/experiment/integrations/_skl_cv.py b/src/hyperactive/experiment/integrations/_skl_cv.py new file mode 100644 index 00000000..b56edb6c --- /dev/null +++ b/src/hyperactive/experiment/integrations/_skl_cv.py @@ -0,0 +1,29 @@ +"""Integration utilities for sklearn splitters with Hyperactive.""" + +__all__ = ["_coerce_cv"] + + +def _coerce_cv(cv): + """Coerce cv argument into a sklearn-compatible cv splitter. + + Parameters + ---------- + cv : int, cross-validation generator, or iterable + The cross-validation strategy to use. + + Returns + ------- + cv_splitter : cross-validation generator or iterable + A sklearn-compatible cross-validation splitter. + """ + # default handling for cv + if isinstance(cv, int): + from sklearn.model_selection import KFold + + return KFold(n_splits=cv, shuffle=True) + elif cv is None: + from sklearn.model_selection import KFold + + return KFold(n_splits=3, shuffle=True) + else: + return cv diff --git a/src/hyperactive/experiment/integrations/sklearn_cv.py b/src/hyperactive/experiment/integrations/sklearn_cv.py index 65b6e7a1..2ecc6c6d 100644 --- a/src/hyperactive/experiment/integrations/sklearn_cv.py +++ b/src/hyperactive/experiment/integrations/sklearn_cv.py @@ -7,6 +7,7 @@ from sklearn.utils.validation import _num_samples from hyperactive.base import BaseExperiment +from hyperactive.experiment.integrations._skl_cv import _coerce_cv from hyperactive.experiment.integrations._skl_metrics import _coerce_to_scorer_and_sign @@ -31,7 +32,7 @@ class SklearnCvExperiment(BaseExperiment): estimator : sklearn estimator The estimator to be used for the experiment. X : array-like, shape (n_samples, n_features) - The input data for the model. + The input data for the model. y : array-like, shape (n_samples,) or (n_samples, n_outputs) The target values for the model. cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True) @@ -86,16 +87,7 @@ def __init__(self, estimator, X, y, scoring=None, cv=None): super().__init__() - if cv is None: - from sklearn.model_selection import KFold - - self._cv = KFold(n_splits=3, shuffle=True) - elif isinstance(cv, int): - from sklearn.model_selection import KFold - - self._cv = KFold(n_splits=cv, shuffle=True) - else: - self._cv = cv + self._cv = _coerce_cv(cv) self._scoring, _sign = _coerce_to_scorer_and_sign(scoring, self.estimator) self.scorer_ = self._scoring diff --git a/src/hyperactive/experiment/integrations/skpro_probareg.py b/src/hyperactive/experiment/integrations/skpro_probareg.py new file mode 100644 index 00000000..a6a99f17 --- /dev/null +++ b/src/hyperactive/experiment/integrations/skpro_probareg.py @@ -0,0 +1,314 @@ +"""Experiment adapter for skpro cross-validation experiments.""" + +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import numpy as np + +from hyperactive.base import BaseExperiment +from hyperactive.experiment.integrations._skl_cv import _coerce_cv + + +class SkproProbaRegExperiment(BaseExperiment): + """Experiment adapter for skpro probabilistic regression or survival prediction. + + This class is used to perform cross-validation experiments using a given + skpro probabilistic regressor or survival predictor. + It allows for hyperparameter tuning and evaluation of + the model's performance. + + The score returned is the summary backtesting score, + of applying ``skpro`` ``evaluate`` to ``estimator`` with the parameters given in + ``score`` ``params``. + + The backtesting performed is specified by the ``cv`` parameter, + and the scoring metric is specified by the ``scoring`` parameter. + The ``X`` and ``y`` parameters are the input data and target values, + which are used in fit/predict cross-validation. + + Parameters + ---------- + estimator : skpro BaseProbaRegressor descendant instance + skpro probabilistic regressor or survival predictor to be evaluated. + + X : pd.DataFrame, shape (n_samples, n_features) + Feature instances to use in evaluation experiment + + y : pd.DataFrame, shape (n_samples,) or (n_samples, n_outputs) + Labels to use in the evaluation experiment + + C : pd.DataFrame, optional (default=None) + censoring information to use in the evaluation experiment, + + * should have same column name as y, same length as X and y + * should have entries 0 and 1 (float or int), + 0 = uncensored, 1 = (right) censored + + if None, all observations are assumed to be uncensored. + Can be passed to any probabilistic regressor, + but is ignored if ``capability:survival`` tag is ``False``. + + cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None = default = ``KFold(n_splits=3, shuffle=True)`` + - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True`` + - An iterable yielding (train, test) splits as arrays of indices. + + scoring : str, callable, default=CRPS + Strategy to evaluate the performance of the cross-validated model on test set. + An ``skpro`` probabilistic regression or survival prediction metric. + + error_score : "raise" or numeric, default=np.nan + Value to assign to the score if an exception occurs in estimator fitting. If set + to "raise", the exception is raised. If a numeric value is given, + FitFailedWarning is raised. + + backend : string, by default "None". + Parallelization backend to use for runs. + Runs parallel evaluate if specified and ``strategy="refit"``. + + - "None": executes loop sequentially, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "dask_lazy": same as "dask", + but changes the return to (lazy) ``dask.dataframe.DataFrame``. + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, + e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + + Example + ------- + >>> import pandas as pd + >>> from hyperactive.experiment.integrations import SkproProbaRegExperiment + >>> from sklearn.datasets import load_diabetes + >>> from sklearn.model_selection import KFold + >>> from skpro.metrics import CRPS + >>> from skpro.regression.dummy import DummyProbaRegressor + >>> + >>> X, y = load_diabetes(return_X_y=True, as_frame=True) + >>> y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame + >>> + >>> skpro_exp = SkproProbaRegExperiment( + ... estimator=DummyProbaRegressor(), + ... scoring=CRPS(), + ... cv=KFold(n_splits=2), + ... X=X, + ... y=y, + ... ) + >>> params = {"strategy": "normal"} + >>> score, add_info = skpro_exp.score(params) + + For default choices of ``scoring`` and ``cv``: + >>> skpro_exp = SkproProbaRegExperiment( + ... estimator=DummyProbaRegressor(), + ... X=X, + ... y=y, + ... ) + >>> params = {"strategy": "most_frequent"} + >>> score, add_info = skpro_exp.score(params) + + Quick call without metadata return or dictionary: + >>> score = skpro_exp({"strategy": "normal"}) + """ + + _tags = { + "authors": "fkiraly", + "maintainers": "fkiraly", + "python_dependencies": "skpro", # python dependencies + } + + def __init__( + self, + estimator, + X, + y, + C=None, + cv=None, + scoring=None, + error_score=np.nan, + backend=None, + backend_params=None, + ): + self.estimator = estimator + self.X = X + self.y = y + self.C = C + self.scoring = scoring + self.cv = cv + self.error_score = error_score + self.backend = backend + self.backend_params = backend_params + + super().__init__() + + self._cv = _coerce_cv(cv) + + if scoring is None: + from skpro.metrics import CRPS + + self._scoring = CRPS() + else: + self._scoring = scoring + + # Set the sign of the scoring function (rely on sklearn scorer if present) + if self._scoring.get_tag("lower_is_better"): + higher_is_better = "lower" + else: + higher_is_better = "higher" + self.set_tags(**{"property:higher_or_lower_is_better": higher_is_better}) + + def _paramnames(self): + """Return the parameter names of the search. + + Returns + ------- + list of str + The parameter names of the search parameters. + """ + return list(self.estimator.get_params().keys()) + + def _evaluate(self, params): + """Evaluate the parameters. + + Parameters + ---------- + params : dict with string keys + Parameters to evaluate. + + Returns + ------- + float + The value of the parameters as per evaluation. + dict + Additional metadata about the search. + """ + from skpro.benchmarking.evaluate import evaluate + + estimator = self.estimator.clone().set_params(**params) + + results = evaluate( + estimator, + cv=self._cv, + X=self.X, + y=self.y, + C=self.C, + scoring=self._scoring, + error_score=self.error_score, + backend=self.backend, + backend_params=self.backend_params, + ) + + result_name = f"test_{self._scoring.name}" + + res_float = results[result_name].mean() + + return res_float, {"results": results} + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the skbase object. + + ``get_test_params`` is a unified interface point to store + parameter settings for testing purposes. This function is also + used in ``create_test_instance`` and ``create_test_instances_and_names`` + to construct test instances. + + ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``. + + Each ``dict`` is a parameter configuration for testing, + and can be used to construct an "interesting" test instance. + A call to ``cls(**params)`` should + be valid for all dictionaries ``params`` in the return of ``get_test_params``. + + The ``get_test_params`` need not return fixed lists of dictionaries, + it can also return dynamic or stochastic parameter settings. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + import pandas as pd + from sklearn.datasets import load_diabetes + from sklearn.model_selection import KFold + from skpro.metrics import ConcordanceHarrell + from skpro.regression.residual import ResidualDouble + from skpro.survival.compose import ConditionUncensored + + X, y = load_diabetes(return_X_y=True, as_frame=True) + y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame + params0 = { + "estimator": ResidualDouble.create_test_instance(), + "X": X, + "y": y, + } + + params1 = { + "estimator": ConditionUncensored.create_test_instance(), + "cv": KFold(n_splits=2), + "X": X, + "y": y, + "scoring": ConcordanceHarrell(), + } + + return [params0, params1] + + @classmethod + def _get_score_params(self): + """Return settings for testing score/evaluate functions. Used in tests only. + + Returns a list, the i-th element should be valid arguments for + self.evaluate and self.score, of an instance constructed with + self.get_test_params()[i]. + + Returns + ------- + list of dict + The parameters to be used for scoring. + """ + from skpro.regression.dummy import DummyProbaRegressor + + val0 = {} + val1 = {"estimator": DummyProbaRegressor()} + return [val0, val1] diff --git a/src/hyperactive/integrations/skpro/__init__.py b/src/hyperactive/integrations/skpro/__init__.py new file mode 100644 index 00000000..121fd14b --- /dev/null +++ b/src/hyperactive/integrations/skpro/__init__.py @@ -0,0 +1,5 @@ +"""Integrations for skpro with Hyperactive.""" + +from hyperactive.integrations.skpro._probareg import ProbaRegOptCV + +__all__ = ["ProbaRegOptCV"] diff --git a/src/hyperactive/integrations/skpro/_probareg.py b/src/hyperactive/integrations/skpro/_probareg.py new file mode 100644 index 00000000..9ddb63d9 --- /dev/null +++ b/src/hyperactive/integrations/skpro/_probareg.py @@ -0,0 +1,330 @@ +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import numpy as np +from skbase.utils.dependencies import _check_soft_dependencies + +if _check_soft_dependencies("skpro", severity="none"): + from skpro.regression.base import _DelegatedProbaRegressor +else: + from skbase.base import BaseEstimator as _DelegatedProbaRegressor + +from hyperactive.experiment.integrations.skpro_probareg import SkproProbaRegExperiment + + +class ProbaRegOptCV(_DelegatedProbaRegressor): + """Tune an skpro regressor via any optimizer in the hyperactive toolbox. + + ``ProbaRegOptCV`` uses any available tuning engine from ``hyperactive`` + to tune a probabilistic regressor. + + It passes backtesting results as scores to the tuning engine, + which identifies the best hyperparameters. + + Any available tuning engine from hyperactive can be used, for example: + + * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``, + this results in the same algorithm as ``TSCGridSearchCV`` + * hill climbing - ``from hyperactive.opt import HillClimbing`` + * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer`` + + Configuration of the tuning engine is as per the respective documentation. + + Formally, ``ProbaRegOptCV`` does the following: + + In ``fit``: + + * wraps the ``estimator``, ``scoring``, and other parameters + into a ``SkproProbaRegExperiment`` instance, which is passed to the + optimizer ``optimizer`` as the ``experiment`` argument. + * Optimal parameters are then obtained from ``optimizer.solve``, and set + as ``best_params_`` and ``best_estimator_`` attributes. + * If ``refit=True``, ``best_estimator_`` is fitted to the entire ``y`` and ``X``. + + In ``predict`` and ``predict``-like methods, calls the respective method + of the ``best_estimator_`` if ``refit=True``. + + Parameters + ---------- + estimator : skpro regressor, BaseProbaRegressor instance or interface compatible + The regressor to tune, must implement the skpro regressor interface. + + optimizer : hyperactive BaseOptimizer + The optimizer to be used for hyperparameter search. + + cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None = default = ``KFold(n_splits=3, shuffle=True)`` + - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True`` + - An iterable yielding (train, test) splits as arrays of indices. + + scoring : str, callable, default=None + Strategy to evaluate the performance of the cross-validated model on + the test set. Can be: + + - a single string resolvable to an sklearn scorer + - a callable that returns a single value; + - ``None`` = default = ``accuracy_score`` + + refit : bool, optional (default=True) + True = refit the forecaster with the best parameters on the entire data in fit + False = no refitting takes place. The forecaster cannot be used to predict. + This is to be used to tune the hyperparameters, and then use the estimator + as a parameter estimator, e.g., via get_fitted_params or PluginParamsForecaster. + + error_score : "raise" or numeric, default=np.nan + Value to assign to the score if an exception occurs in estimator fitting. If set + to "raise", the exception is raised. If a numeric value is given, + FitFailedWarning is raised. + + backend : string, by default "None". + Parallelization backend to use for runs. + Runs parallel evaluate if specified and ``strategy="refit"``. + + - "None": executes loop sequentially, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "dask_lazy": same as "dask", + but changes the return to (lazy) ``dask.dataframe.DataFrame``. + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, + e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + + Example + ------- + Any available tuning engine from hyperactive can be used, for example: + + * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch`` + * hill climbing - ``from hyperactive.opt import HillClimbing`` + * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer`` + + For illustration, we use grid search, this can be replaced by any other optimizer. + + 1. defining the tuned estimator: + >>> from skpro.regression.dummy import DummyProbaRegressor + >>> from skpro.metrics import CRPS + >>> from sklearn.model_selection import KFold + >>> from hyperactive.integrations.skpro import ProbaRegOptCV + >>> from hyperactive.opt import GridSearchSk as GridSearch + >>> + >>> param_grid = {"strategy": ["empirical", "normal"]} + >>> tuned_naive = ProbaRegOptCV( + ... DummyProbaRegressor(), + ... GridSearch(param_grid), + ... cv=KFold(n_splits=2, shuffle=False), + ... scoring=CRPS(), + ... ) + + 2. fitting the tuned estimator: + >>> import pandas as pd + >>> from sklearn.datasets import load_diabetes + >>> from sklearn.model_selection import train_test_split + >>> X, y = load_diabetes(return_X_y=True, as_frame=True) + >>> y = pd.DataFrame(y) # skpro assumes y is pd.DataFrame + >>> X_train, X_test, y_train, y_test = train_test_split(X, y) + >>> + >>> tuned_naive.fit(X_train, y_train) + ProbaRegOptCV(...) + >>> y_pred = tuned_naive.predict(X_test) + + 3. obtaining best parameters and best estimator + >>> best_params = tuned_naive.best_params_ + >>> best_regressor = tuned_naive.best_estimator_ + """ + + _tags = { + "authors": "fkiraly", + "maintainers": "fkiraly", + "python_dependencies": "skpro", + } + + # attribute for _DelegatedClassifier, which then delegates + # all non-overridden methods are same as of getattr(self, _delegate_name) + # see further details in _DelegatedClassifier docstring + _delegate_name = "best_estimator_" + + def __init__( + self, + estimator, + optimizer, + cv=None, + scoring=None, + refit=True, + error_score=np.nan, + backend=None, + backend_params=None, + ): + self.estimator = estimator + self.optimizer = optimizer + self.cv = cv + self.scoring = scoring + self.refit = refit + self.error_score = error_score + self.backend = backend + self.backend_params = backend_params + super().__init__() + + def _fit(self, X, y, C=None): + """Fit regressor to training data. + + Writes to self: + Sets fitted model attributes ending in "_". + + Changes state to "fitted" = sets is_fitted flag to True + + Parameters + ---------- + X : pandas DataFrame + feature instances to fit regressor to + + y : pd.DataFrame, must be same length as X + labels to fit regressor to + + C : pd.DataFrame, optional (default=None) + censoring information for survival analysis, + + * should have same column name as y, same length as X and y + * should have entries 0 and 1 (float or int), + 0 = uncensored, 1 = (right) censored + + if None, all observations are assumed to be uncensored. + + Returns + ------- + self : Reference to self. + """ + estimator = self.estimator.clone() + + experiment = SkproProbaRegExperiment( + estimator=estimator, + scoring=self.scoring, + cv=self.cv, + X=X, + y=y, + C=C, + error_score=self.error_score, + backend=self.backend, + backend_params=self.backend_params, + ) + + optimizer = self.optimizer.clone() + optimizer.set_params(experiment=experiment) + best_params = optimizer.solve() + + self.best_params_ = best_params + self.best_estimator_ = estimator.set_params(**best_params) + + # Refit model with best parameters. + if self.refit: + self.best_estimator_.fit(X=X, y=y, C=C) + + return self + + def _predict(self, X): + """Predict labels for data from features. + + State required: + Requires state to be "fitted" = self.is_fitted=True + + Accesses in self: + Fitted model attributes ending in "_" + + Parameters + ---------- + X : pandas DataFrame, must have same columns as X in `fit` + data to predict labels for + + Returns + ------- + y : pandas DataFrame, same length as `X`, same columns as `y` in `fit` + labels predicted for `X` + """ + if not self.refit: + raise RuntimeError( + f"In {self.__class__.__name__}, refit must be True to make predictions," + f" but found refit=False. If refit=False, {self.__class__.__name__} can" + " be used only to tune hyper-parameters, as a parameter estimator." + ) + return super()._predict(X=X) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return ``"default"`` set. + + Returns + ------- + params : dict or list of dict + """ + from sklearn.model_selection import KFold + from skpro.metrics import CRPS, ConcordanceHarrell + from skpro.regression.bootstrap import BootstrapRegressor + from skpro.regression.residual import ResidualDouble + from skpro.survival.compose import ConditionUncensored + + from hyperactive.opt.gfo import HillClimbing + from hyperactive.opt.gridsearch import GridSearchSk + from hyperactive.opt.random_search import RandomSearchSk + + params_gridsearch = { + "estimator": ResidualDouble.create_test_instance(), + "optimizer": GridSearchSk(param_grid={"distr_type": ["Normal", "Laplace"]}), + } + param_randomsearch = { + "estimator": ResidualDouble.create_test_instance(), + "optimizer": RandomSearchSk( + param_distributions={"distr_type": ["Normal", "Laplace"]}, + n_iter=2, + ), + "cv": 2, + "scoring": CRPS(), + } + params_hillclimb = { + "estimator": ConditionUncensored(BootstrapRegressor.create_test_instance()), + "cv": KFold(n_splits=2), + "optimizer": HillClimbing( + search_space={"estimator__n_bootstrap_samples": [3, 7, 12]}, + n_iter=10, + n_neighbours=5, + ), + "scoring": ConcordanceHarrell(), + } + + return [params_gridsearch, param_randomsearch, params_hillclimb] diff --git a/src/hyperactive/integrations/skpro/tests/__init__.py b/src/hyperactive/integrations/skpro/tests/__init__.py new file mode 100644 index 00000000..f610fcb1 --- /dev/null +++ b/src/hyperactive/integrations/skpro/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for integrations for skpro.""" diff --git a/src/hyperactive/integrations/skpro/tests/test_skpro_estimators.py b/src/hyperactive/integrations/skpro/tests/test_skpro_estimators.py new file mode 100644 index 00000000..8032dee3 --- /dev/null +++ b/src/hyperactive/integrations/skpro/tests/test_skpro_estimators.py @@ -0,0 +1,22 @@ +"""Integration tests for skpro tuners.""" +# copyright: hyperactive developers, MIT License (see LICENSE file) + +import pytest +from skbase.utils.dependencies import _check_soft_dependencies + +if _check_soft_dependencies("skpro", severity="none"): + from hyperactive.integrations.skpro import ProbaRegOptCV + + EST_TO_TEST = [ProbaRegOptCV] +else: + EST_TO_TEST = [] + + +@pytest.mark.parametrize("estimator", EST_TO_TEST) +def test_sktime_estimator(estimator): + """Test sktime estimator via check_estimator.""" + from skpro.utils.estimator_checks import check_estimator + + check_estimator(estimator, raise_exceptions=True) + # The above line collects all API conformance tests in skpro and runs them. + # It will raise an error if the estimator is not API conformant.