diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index bacf905e7..b8b150498 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -29,7 +29,7 @@ jobs: - name: Run tests run: | if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml'; fi - python -m pytest --durations=20 --timeout=300 --timeout-method=thread -v $codecov test + python -m pytest --durations=20 --timeout=500 --timeout-method=thread -v $codecov test - name: Check for files left behind by test if: ${{ always() }} run: | diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 12bd96fd6..329cc4dd8 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -73,7 +73,8 @@ def send_warnings_to_log( with warnings.catch_warnings(): warnings.showwarning = send_warnings_to_log if task in REGRESSION_TASKS: - prediction = pipeline.predict(X_, batch_size=batch_size) + # Voting regressor does not support batch size + prediction = pipeline.predict(X_) else: # Voting classifier predict proba does not support batch size prediction = pipeline.predict_proba(X_) @@ -161,7 +162,7 @@ def __init__( delete_tmp_folder_after_terminate=delete_tmp_folder_after_terminate, delete_output_folder_after_terminate=delete_output_folder_after_terminate, ) - self.task_type = task_type + self.task_type = task_type or "" self._stopwatch = StopWatch() self.pipeline_options = replace_string_bool_to_bool(json.load(open( @@ -789,7 +790,7 @@ def _search( max_models_on_disc=self.max_models_on_disc, seed=self.seed, max_iterations=None, - read_at_most=np.inf, + read_at_most=sys.maxsize, ensemble_memory_limit=self._memory_limit, random_state=self.seed, precision=precision, @@ -1050,7 +1051,7 @@ def predict( all_predictions = joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(_pipeline_predict)( - models[identifier], X_test, batch_size, self._logger, self.task_type + models[identifier], X_test, batch_size, self._logger, STRING_TO_TASK_TYPES[self.task_type] ) for identifier in self.ensemble_.get_selected_model_identifiers() ) @@ -1064,17 +1065,6 @@ def predict( predictions = self.ensemble_.predict(all_predictions) - if self.task_type in REGRESSION_TASKS: - # Make sure prediction probabilities - # are within a valid range - # Individual models are checked in _pipeline_predict - if ( - (predictions >= 0).all() and (predictions <= 1).all() - ): - raise ValueError("For ensemble {}, prediction probability not within [0, 1]!".format( - self.ensemble_) - ) - self._clean_logger() return predictions diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py new file mode 100644 index 000000000..394a7230f --- /dev/null +++ b/autoPyTorch/api/tabular_regression.py @@ -0,0 +1,249 @@ +import os +import uuid +from typing import Any, Callable, Dict, List, Optional, Union + +import numpy as np + +import pandas as pd + +from autoPyTorch.api.base_task import BaseTask +from autoPyTorch.constants import ( + TABULAR_REGRESSION, + TASK_TYPES_TO_STRING +) +from autoPyTorch.data.tabular_validator import TabularInputValidator +from autoPyTorch.datasets.base_dataset import BaseDataset +from autoPyTorch.datasets.resampling_strategy import ( + CrossValTypes, + HoldoutValTypes, +) +from autoPyTorch.datasets.tabular_dataset import TabularDataset +from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline +from autoPyTorch.utils.backend import Backend +from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates + + +class TabularRegressionTask(BaseTask): + """ + Tabular Regression API to the pipelines. + Args: + seed (int): seed to be used for reproducibility. + n_jobs (int), (default=1): number of consecutive processes to spawn. + logging_config (Optional[Dict]): specifies configuration + for logging, if None, it is loaded from the logging.yaml + ensemble_size (int), (default=50): Number of models added to the ensemble built by + Ensemble selection from libraries of models. + Models are drawn with replacement. + ensemble_nbest (int), (default=50): only consider the ensemble_nbest + models to build the ensemble + max_models_on_disc (int), (default=50): maximum number of models saved to disc. + Also, controls the size of the ensemble as any additional models will be deleted. + Must be greater than or equal to 1. + temporary_directory (str): folder to store configuration output and log file + output_directory (str): folder to store predictions for optional test set + delete_tmp_folder_after_terminate (bool): determines whether to delete the temporary directory, + when finished + include_components (Optional[Dict]): If None, all possible components are used. + Otherwise specifies set of components to use. + exclude_components (Optional[Dict]): If None, all possible components are used. + Otherwise specifies set of components not to use. Incompatible with include + components + """ + + def __init__( + self, + seed: int = 1, + n_jobs: int = 1, + logging_config: Optional[Dict] = None, + ensemble_size: int = 50, + ensemble_nbest: int = 50, + max_models_on_disc: int = 50, + temporary_directory: Optional[str] = None, + output_directory: Optional[str] = None, + delete_tmp_folder_after_terminate: bool = True, + delete_output_folder_after_terminate: bool = True, + include_components: Optional[Dict] = None, + exclude_components: Optional[Dict] = None, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy_args: Optional[Dict[str, Any]] = None, + backend: Optional[Backend] = None, + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None + ): + super().__init__( + seed=seed, + n_jobs=n_jobs, + logging_config=logging_config, + ensemble_size=ensemble_size, + ensemble_nbest=ensemble_nbest, + max_models_on_disc=max_models_on_disc, + temporary_directory=temporary_directory, + output_directory=output_directory, + delete_tmp_folder_after_terminate=delete_tmp_folder_after_terminate, + delete_output_folder_after_terminate=delete_output_folder_after_terminate, + include_components=include_components, + exclude_components=exclude_components, + backend=backend, + resampling_strategy=resampling_strategy, + resampling_strategy_args=resampling_strategy_args, + search_space_updates=search_space_updates, + task_type=TASK_TYPES_TO_STRING[TABULAR_REGRESSION], + ) + + def _get_required_dataset_properties(self, dataset: BaseDataset) -> Dict[str, Any]: + if not isinstance(dataset, TabularDataset): + raise ValueError("Dataset is incompatible for the given task,: {}".format( + type(dataset) + )) + return {'task_type': dataset.task_type, + 'output_type': dataset.output_type, + 'issparse': dataset.issparse, + 'numerical_columns': dataset.numerical_columns, + 'categorical_columns': dataset.categorical_columns} + + def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularRegressionPipeline: + return TabularRegressionPipeline(dataset_properties=dataset_properties) + + def search(self, + optimize_metric: str, + X_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, + y_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, + X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, + y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, + dataset_name: Optional[str] = None, + budget_type: Optional[str] = None, + budget: Optional[float] = None, + total_walltime_limit: int = 100, + func_eval_time_limit: int = 60, + traditional_per_total_budget: float = 0.1, + memory_limit: Optional[int] = 4096, + smac_scenario_args: Optional[Dict[str, Any]] = None, + get_smac_object_callback: Optional[Callable] = None, + all_supported_metrics: bool = True, + precision: int = 32, + disable_file_output: List = [], + load_models: bool = True, + ) -> 'BaseTask': + """ + Search for the best pipeline configuration for the given dataset. + + Fit both optimizes the machine learning models and builds an ensemble out of them. + To disable ensembling, set ensemble_size==0. + using the optimizer. + Args: + X_train, y_train, X_test, y_test: Union[np.ndarray, List, pd.DataFrame] + A pair of features (X_train) and targets (y_train) used to fit a + pipeline. Additionally, a holdout of this pairs (X_test, y_test) can + be provided to track the generalization performance of each stage. + optimize_metric (str): name of the metric that is used to + evaluate a pipeline. + budget_type (Optional[str]): + Type of budget to be used when fitting the pipeline. + Either 'epochs' or 'runtime'. If not provided, uses + the default in the pipeline config ('epochs') + budget (Optional[float]): + Budget to fit a single run of the pipeline. If not + provided, uses the default in the pipeline config + total_walltime_limit (int), (default=100): Time limit + in seconds for the search of appropriate models. + By increasing this value, autopytorch has a higher + chance of finding better models. + func_eval_time_limit (int), (default=60): Time limit + for a single call to the machine learning model. + Model fitting will be terminated if the machine + learning algorithm runs over the time limit. Set + this value high enough so that typical machine + learning algorithms can be fit on the training + data. + traditional_per_total_budget (float), (default=0.1): + Percent of total walltime to be allocated for + running traditional classifiers. + memory_limit (Optional[int]), (default=4096): Memory + limit in MB for the machine learning algorithm. autopytorch + will stop fitting the machine learning algorithm if it tries + to allocate more than memory_limit MB. If None is provided, + no memory limit is set. In case of multi-processing, memory_limit + will be per job. This memory limit also applies to the ensemble + creation process. + smac_scenario_args (Optional[Dict]): Additional arguments inserted + into the scenario of SMAC. See the + [SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario) + get_smac_object_callback (Optional[Callable]): Callback function + to create an object of class + [smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html). + The function must accept the arguments scenario_dict, + instances, num_params, runhistory, seed and ta. This is + an advanced feature. Use only if you are familiar with + [SMAC](https://automl.github.io/SMAC3/master/index.html). + all_supported_metrics (bool), (default=True): if True, all + metrics supporting current task will be calculated + for each pipeline and results will be available via cv_results + precision (int), (default=32): Numeric precision used when loading + ensemble data. Can be either '16', '32' or '64'. + disable_file_output (Union[bool, List]): + load_models (bool), (default=True): Whether to load the + models after fitting AutoPyTorch. + + Returns: + self + + """ + if dataset_name is None: + dataset_name = str(uuid.uuid1(clock_seq=os.getpid())) + + # we have to create a logger for at this point for the validator + self._logger = self._get_logger(dataset_name) + + # Create a validator object to make sure that the data provided by + # the user matches the autopytorch requirements + self.InputValidator = TabularInputValidator( + is_classification=False, + logger_port=self._logger_port, + ) + + # Fit a input validator to check the provided data + # Also, an encoder is fit to both train and test data, + # to prevent unseen categories during inference + self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test) + + self.dataset = TabularDataset( + X=X_train, Y=y_train, + X_test=X_test, Y_test=y_test, + validator=self.InputValidator, + resampling_strategy=self.resampling_strategy, + resampling_strategy_args=self.resampling_strategy_args, + ) + + return self._search( + dataset=self.dataset, + optimize_metric=optimize_metric, + budget_type=budget_type, + budget=budget, + total_walltime_limit=total_walltime_limit, + func_eval_time_limit=func_eval_time_limit, + traditional_per_total_budget=traditional_per_total_budget, + memory_limit=memory_limit, + smac_scenario_args=smac_scenario_args, + get_smac_object_callback=get_smac_object_callback, + all_supported_metrics=all_supported_metrics, + precision=precision, + disable_file_output=disable_file_output, + load_models=load_models, + ) + + def predict( + self, + X_test: np.ndarray, + batch_size: Optional[int] = None, + n_jobs: int = 1 + ) -> np.ndarray: + if self.InputValidator is None or not self.InputValidator._is_fitted: + raise ValueError("predict() is only supported after calling search. Kindly call first " + "the estimator fit() method.") + + X_test = self.InputValidator.feature_validator.transform(X_test) + predicted_values = super().predict(X_test, batch_size=batch_size, + n_jobs=n_jobs) + + # Allow to predict in the original domain -- that is, the user is not interested + # in our encoded values + return self.InputValidator.target_validator.inverse_transform(predicted_values) diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py index 51a7a8e38..d65f245e9 100644 --- a/autoPyTorch/datasets/base_dataset.py +++ b/autoPyTorch/datasets/base_dataset.py @@ -11,6 +11,7 @@ import torchvision +from autoPyTorch.constants import CLASSIFICATION_OUTPUTS, STRING_TO_OUTPUT_TYPES from autoPyTorch.datasets.resampling_strategy import ( CROSS_VAL_FN, CrossValTypes, @@ -113,11 +114,15 @@ def __init__( self.resampling_strategy_args = resampling_strategy_args self.task_type: Optional[str] = None self.issparse: bool = issparse(self.train_tensors[0]) - self.input_shape: Tuple[int] = train_tensors[0].shape[1:] - self.num_classes: Optional[int] = None - if len(train_tensors) == 2 and train_tensors[1] is not None: + self.input_shape: Tuple[int] = self.train_tensors[0].shape[1:] + + if len(self.train_tensors) == 2 and self.train_tensors[1] is not None: self.output_type: str = type_of_target(self.train_tensors[1]) - self.output_shape: int = train_tensors[1].shape[1] if train_tensors[1].shape == 2 else 1 + + if STRING_TO_OUTPUT_TYPES[self.output_type] in CLASSIFICATION_OUTPUTS: + self.output_shape = len(np.unique(self.train_tensors[1])) + else: + self.output_shape = self.train_tensors[1].shape[-1] if self.train_tensors[1].ndim > 1 else 1 # TODO: Look for a criteria to define small enough to preprocess self.is_small_preprocess = True @@ -368,8 +373,7 @@ def get_dataset_properties(self, dataset_requirements: List[FitRequirement]) -> 'output_type': self.output_type, 'issparse': self.issparse, 'input_shape': self.input_shape, - 'output_shape': self.output_shape, - 'num_classes': self.num_classes, + 'output_shape': self.output_shape }) return dataset_properties diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py index aeb89464d..db5caf72b 100644 --- a/autoPyTorch/evaluation/abstract_evaluator.py +++ b/autoPyTorch/evaluation/abstract_evaluator.py @@ -12,7 +12,7 @@ from sklearn.base import BaseEstimator from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.ensemble import VotingClassifier, VotingRegressor +from sklearn.ensemble import VotingClassifier from smac.tae import StatusType @@ -32,6 +32,7 @@ from autoPyTorch.datasets.base_dataset import BaseDataset from autoPyTorch.datasets.tabular_dataset import TabularDataset from autoPyTorch.evaluation.utils import ( + VotingRegressorWrapper, convert_multioutput_multiclass_to_multilabel ) from autoPyTorch.pipeline.base_pipeline import BasePipeline @@ -513,7 +514,7 @@ def file_output( if self.task_type in CLASSIFICATION_TASKS: pipelines = VotingClassifier(estimators=None, voting='soft', ) else: - pipelines = VotingRegressor(estimators=None) + pipelines = VotingRegressorWrapper(estimators=None) pipelines.estimators_ = self.pipelines else: pipelines = None diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py index 3d3887ee5..0945ff9d6 100644 --- a/autoPyTorch/evaluation/train_evaluator.py +++ b/autoPyTorch/evaluation/train_evaluator.py @@ -297,6 +297,7 @@ def _predict(self, pipeline: BaseEstimator, self.y_valid) else: valid_pred = None + if self.X_test is not None: test_pred = self.predict_function(self.X_test, pipeline, self.y_train[train_indices]) diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py index f7cefd100..a094ecbc1 100644 --- a/autoPyTorch/evaluation/utils.py +++ b/autoPyTorch/evaluation/utils.py @@ -4,13 +4,16 @@ import numpy as np +from sklearn.ensemble import VotingRegressor + from smac.runhistory.runhistory import RunValue __all__ = [ 'read_queue', 'convert_multioutput_multiclass_to_multilabel', 'extract_learning_curve', - 'empty_queue' + 'empty_queue', + 'VotingRegressorWrapper' ] @@ -78,3 +81,24 @@ def convert_multioutput_multiclass_to_multilabel(probas: Union[List, np.ndarray] multioutput_probas[:, i] = 0 probas = multioutput_probas return probas + + +class VotingRegressorWrapper(VotingRegressor): + """ + Wrapper around the sklearn voting regressor that properly handles + predictions with shape (B, 1) + """ + + def _predict(self, X: np.array) -> np.array: + # overriding the _predict function should be enough + predictions = [] + for est in self.estimators_: + pred = est.predict(X) + + if pred.ndim > 1 and pred.shape[1] > 1: + raise ValueError("Multi-output regression not yet supported with VotingRegressor. " + "Issue is addressed here: https://github.com/scikit-learn/scikit-learn/issues/18289") + + predictions.append(pred.ravel()) + + return np.asarray(predictions).T diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py index 83484da1d..4461ff502 100644 --- a/autoPyTorch/pipeline/base_pipeline.py +++ b/autoPyTorch/pipeline/base_pipeline.py @@ -164,8 +164,7 @@ def configuration_fully_fitted(self) -> bool: def get_current_iter(self) -> int: return self._final_estimator.get_current_iter() - def predict(self, X: np.ndarray, batch_size: Optional[int] = None - ) -> np.ndarray: + def predict(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray: """Predict the output using the selected model. Args: diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py index 24491af44..e90f35ed1 100644 --- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py +++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py @@ -90,4 +90,9 @@ def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torc if self.preprocessor is None: raise ValueError("cant call {} without fitting the column transformer first." .format(self.__class__.__name__)) + + if len(X.shape) == 1: + # expand batch dimension when called on a single record + X = X[np.newaxis, ...] + return self.preprocessor.transform(X) diff --git a/autoPyTorch/pipeline/components/setup/network/base_network.py b/autoPyTorch/pipeline/components/setup/network/base_network.py index b40c7e774..0a80f526e 100644 --- a/autoPyTorch/pipeline/components/setup/network/base_network.py +++ b/autoPyTorch/pipeline/components/setup/network/base_network.py @@ -9,7 +9,7 @@ from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent -from autoPyTorch.utils.common import FitRequirement +from autoPyTorch.utils.common import FitRequirement, get_device_from_fit_dictionary class NetworkComponent(autoPyTorchTrainingComponent): @@ -21,14 +21,11 @@ class NetworkComponent(autoPyTorchTrainingComponent): def __init__( self, network: Optional[torch.nn.Module] = None, - random_state: Optional[np.random.RandomState] = None, - device: Optional[torch.device] = None + random_state: Optional[np.random.RandomState] = None ) -> None: super(NetworkComponent, self).__init__() - self.network = network self.random_state = random_state - self.device = torch.device( - "cuda" if torch.cuda.is_available() else "cpu") if device is None else device + self.device = None self.add_fit_requirements([ FitRequirement("network_head", (torch.nn.Module,), user_defined=False, dataset_property=False), FitRequirement("network_backbone", (torch.nn.Module,), user_defined=False, dataset_property=False), @@ -53,6 +50,9 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchTrainingComponent: self.network = torch.nn.Sequential(X['network_backbone'], X['network_head']) # Properly set the network training device + if self.device is None: + self.device = get_device_from_fit_dictionary(X) + self.to(self.device) if STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']] in CLASSIFICATION_TASKS: @@ -113,12 +113,14 @@ def predict(self, loader: torch.utils.data.DataLoader) -> torch.Tensor: for i, (X_batch, Y_batch) in enumerate(loader): # Predict on batch - X_batch = torch.autograd.Variable(X_batch).float().to(self.device) + X_batch = X_batch.float().to(self.device) + + with torch.no_grad(): + Y_batch_pred = self.network(X_batch) + if self.final_activation is not None: + Y_batch_pred = self.final_activation(Y_batch_pred) - Y_batch_pred = self.network(X_batch).detach().cpu() - if self.final_activation is not None: - Y_batch_pred = self.final_activation(Y_batch_pred) - Y_batch_preds.append(Y_batch_pred) + Y_batch_preds.append(Y_batch_pred.cpu()) return torch.cat(Y_batch_preds, 0).cpu().numpy() diff --git a/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py b/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py index ced7630fa..0011accaa 100644 --- a/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py +++ b/autoPyTorch/pipeline/components/setup/network_head/base_network_head.py @@ -3,7 +3,6 @@ import torch.nn as nn -from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES from autoPyTorch.pipeline.components.base_component import BaseEstimator, autoPyTorchComponent from autoPyTorch.pipeline.components.setup.network_backbone.utils import get_output_shape from autoPyTorch.utils.common import FitRequirement @@ -20,7 +19,6 @@ def __init__(self, super().__init__() self.add_fit_requirements([ FitRequirement('input_shape', (Iterable,), user_defined=True, dataset_property=True), - FitRequirement('num_classes', (int,), user_defined=True, dataset_property=True), FitRequirement('task_type', (str,), user_defined=True, dataset_property=True), FitRequirement('output_shape', (Iterable, int), user_defined=True, dataset_property=True), ]) @@ -38,9 +36,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator: Self """ input_shape = X['dataset_properties']['input_shape'] - output_shape = (X['dataset_properties']['num_classes'],) if \ - STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']] in \ - CLASSIFICATION_TASKS else X['dataset_properties']['output_shape'] + output_shape = X['dataset_properties']['output_shape'] self.head = self.build_head( input_shape=get_output_shape(X['network_backbone'], input_shape=input_shape), diff --git a/autoPyTorch/pipeline/components/training/metrics/utils.py b/autoPyTorch/pipeline/components/training/metrics/utils.py index f245d5f39..d386ce47e 100644 --- a/autoPyTorch/pipeline/components/training/metrics/utils.py +++ b/autoPyTorch/pipeline/components/training/metrics/utils.py @@ -20,8 +20,12 @@ def sanitize_array(array: np.ndarray) -> np.ndarray: :return: """ a = np.ravel(array) - maxi = np.nanmax(a[np.isfinite(a)]) - mini = np.nanmin(a[np.isfinite(a)]) + finite = np.isfinite(a) + if np.any(finite): + maxi = np.nanmax(a[finite]) + mini = np.nanmin(a[finite]) + else: + maxi = mini = 0 array[array == float('inf')] = maxi array[array == float('-inf')] = mini mid = (maxi + mini) / 2 diff --git a/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py b/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py index 5bcf0f861..ef31c27c5 100644 --- a/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py @@ -71,11 +71,13 @@ def get_hyperparameter_search_space(dataset_properties: typing.Optional[typing.D ) -> ConfigurationSpace: alpha = UniformFloatHyperparameter( "alpha", alpha[0][0], alpha[0][1], default_value=alpha[1]) - weighted_loss = CategoricalHyperparameter("weighted_loss", choices=weighted_loss[0], - default_value=weighted_loss[1]) + cs = ConfigurationSpace() cs.add_hyperparameters([alpha]) if dataset_properties is not None: - if STRING_TO_TASK_TYPES[dataset_properties['task_type']] not in CLASSIFICATION_TASKS: - cs.add_hyperparameters([weighted_loss]) + if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: + weighted_loss = CategoricalHyperparameter("weighted_loss", + choices=weighted_loss[0], + default_value=weighted_loss[1]) + cs.add_hyperparameter(weighted_loss) return cs diff --git a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py index dbd190c59..6acfb2982 100644 --- a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py @@ -56,11 +56,12 @@ def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.A def get_hyperparameter_search_space(dataset_properties: typing.Optional[typing.Dict] = None, weighted_loss: typing.Tuple[typing.Tuple, bool] = ((True, False), True) ) -> ConfigurationSpace: - weighted_loss = CategoricalHyperparameter("weighted_loss", choices=weighted_loss[0], - default_value=weighted_loss[1]) cs = ConfigurationSpace() if dataset_properties is not None: - if STRING_TO_TASK_TYPES[dataset_properties['task_type']] not in CLASSIFICATION_TASKS: - cs.add_hyperparameters([weighted_loss]) + if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS: + weighted_loss = CategoricalHyperparameter("weighted_loss", + choices=weighted_loss[0], + default_value=weighted_loss[1]) + cs.add_hyperparameter(weighted_loss) return cs diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py index 69665007e..26109fca6 100644 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py @@ -6,12 +6,11 @@ import pandas as pd import torch -from torch.autograd import Variable from torch.optim import Optimizer from torch.optim.lr_scheduler import _LRScheduler from torch.utils.tensorboard.writer import SummaryWriter -from autoPyTorch.constants import BINARY +from autoPyTorch.constants import REGRESSION_TASKS from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score from autoPyTorch.utils.implementations import get_loss_weight_strategy @@ -119,9 +118,8 @@ def add_performance(self, def get_best_epoch(self, loss_type: str = 'val_loss') -> int: return np.argmin( - [self.performance_tracker[loss_type][e] for e in range(1, len( - self.performance_tracker[loss_type]) + 1 - )] + [self.performance_tracker[loss_type][e] + for e in range(1, len(self.performance_tracker[loss_type]) + 1)] ) + 1 # Epochs start at 1 def get_last_epoch(self) -> int: @@ -194,18 +192,17 @@ def prepare( # Weights for the loss function weights = None - kwargs = {} - if self.weighted_loss: - weights = self.get_class_weights(output_type, labels) - if output_type == BINARY: - kwargs['pos_weight'] = weights - else: - kwargs['weight'] = weights - - criterion = criterion(**kwargs) if weights is not None else criterion() + kwargs: Dict[str, Any] = {} + # if self.weighted_loss: + # weights = self.get_class_weights(output_type, labels) + # if output_type == BINARY: + # kwargs['pos_weight'] = weights + # pass + # else: + # kwargs['weight'] = weights # Setup the loss function - self.criterion = criterion + self.criterion = criterion(**kwargs) if weights is not None else criterion() # setup the model self.model = model.to(device) @@ -271,8 +268,8 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int, loss, outputs = self.train_step(data, targets) # save for metric evaluation - outputs_data.append(outputs.detach()) - targets_data.append(targets.detach()) + outputs_data.append(outputs.detach().cpu()) + targets_data.append(targets.detach().cpu()) batch_size = data.size(0) loss_sum += loss * batch_size @@ -290,6 +287,16 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int, else: return loss_sum / N, {} + def cast_targets(self, targets: torch.Tensor) -> torch.Tensor: + if self.task_type in REGRESSION_TASKS: + targets = targets.float().to(self.device) + # make sure that targets will have same shape as outputs (really important for mse loss for example) + if targets.ndim == 1: + targets = targets.unsqueeze(1) + else: + targets = targets.long().to(self.device) + return targets + def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torch.Tensor]: """ Allows to train 1 step of gradient descent, given a batch of train/labels @@ -304,10 +311,9 @@ def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torc """ # prepare data = data.float().to(self.device) - targets = targets.long().to(self.device) + targets = self.cast_targets(targets) data, criterion_kwargs = self.data_preparation(data, targets) - data = Variable(data) # training self.optimizer.zero_grad() @@ -347,17 +353,19 @@ def evaluate(self, test_loader: torch.utils.data.DataLoader, epoch: int, with torch.no_grad(): for step, (data, targets) in enumerate(test_loader): - batch_size = data.shape[0] + data = data.float().to(self.device) - targets = targets.long().to(self.device) + targets = self.cast_targets(targets) + outputs = self.model(data) + loss = self.criterion(outputs, targets) loss_sum += loss.item() * batch_size N += batch_size - outputs_data.append(outputs.detach()) - targets_data.append(targets.detach()) + outputs_data.append(outputs.detach().cpu()) + targets_data.append(targets.detach().cpu()) if writer: writer.add_scalar( @@ -372,8 +380,8 @@ def evaluate(self, test_loader: torch.utils.data.DataLoader, epoch: int, def compute_metrics(self, outputs_data: np.ndarray, targets_data: np.ndarray ) -> Dict[str, float]: # TODO: change once Ravin Provides the PR - outputs_data = torch.cat(outputs_data, dim=0) - targets_data = torch.cat(targets_data, dim=0) + outputs_data = torch.cat(outputs_data, dim=0).numpy() + targets_data = torch.cat(targets_data, dim=0).numpy() return calculate_score(targets_data, outputs_data, self.task_type, self.metrics) def get_class_weights(self, output_type: int, labels: Union[np.ndarray, torch.Tensor, pd.DataFrame] @@ -381,7 +389,7 @@ def get_class_weights(self, output_type: int, labels: Union[np.ndarray, torch.Te strategy = get_loss_weight_strategy(output_type) weights = strategy(y=labels) weights = torch.from_numpy(weights) - weights = weights.type(torch.FloatTensor).to(self.device) + weights = weights.float().to(self.device) return weights def data_preparation(self, X: np.ndarray, y: np.ndarray, diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py index e6a630fb6..667dd1ac6 100755 --- a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py +++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py @@ -33,7 +33,7 @@ BudgetTracker, RunSummary, ) -from autoPyTorch.utils.common import FitRequirement +from autoPyTorch.utils.common import FitRequirement, get_device_from_fit_dictionary from autoPyTorch.utils.logging_ import get_named_client_logger trainer_directory = os.path.split(__file__)[0] @@ -98,11 +98,11 @@ def get_components(self) -> Dict[str, autoPyTorchComponent]: return components def get_hyperparameter_search_space( - self, - dataset_properties: Optional[Dict[str, str]] = None, - default: Optional[str] = None, - include: Optional[List[str]] = None, - exclude: Optional[List[str]] = None, + self, + dataset_properties: Optional[Dict[str, str]] = None, + default: Optional[str] = None, + include: Optional[List[str]] = None, + exclude: Optional[List[str]] = None, ) -> ConfigurationSpace: """Returns the configuration space of the current chosen components @@ -192,8 +192,7 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom self.logger = get_named_client_logger( name=X['num_run'], # Log to a user provided port else to the default logging port - port=X['logger_port' - ] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT, + port=X['logger_port'] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT, ) fit_function = self._fit @@ -270,7 +269,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Modu name=additional_losses), budget_tracker=budget_tracker, optimizer=X['optimizer'], - device=self.get_device(X), + device=get_device_from_fit_dictionary(X), metrics_during_training=X['metrics_during_training'], scheduler=X['lr_scheduler'], task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']], @@ -499,21 +498,6 @@ def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None: if 'early_stopping' not in X: raise ValueError('To fit a Trainer, expected fit dictionary to have early_stopping') - def get_device(self, X: Dict[str, Any]) -> torch.device: - """ - Returns the device to do torch operations - - Args: - X (Dict[str, Any]): A fit dictionary to control how the pipeline - is fitted - - Returns: - torch.device: the device in which to compute operations. Cuda/cpu - """ - if not torch.cuda.is_available(): - return torch.device('cpu') - return torch.device(X['device']) - @staticmethod def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: """ diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py index ec80b4a5c..a02adb5ab 100644 --- a/autoPyTorch/pipeline/tabular_classification.py +++ b/autoPyTorch/pipeline/tabular_classification.py @@ -60,17 +60,16 @@ class TabularClassificationPipeline(ClassifierMixin, BasePipeline): Examples """ - def __init__( - self, - config: Optional[Configuration] = None, - steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None, - dataset_properties: Optional[Dict[str, Any]] = None, - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - random_state: Optional[np.random.RandomState] = None, - init_params: Optional[Dict[str, Any]] = None, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None - ): + def __init__(self, + config: Optional[Configuration] = None, + steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None, + dataset_properties: Optional[Dict[str, Any]] = None, + include: Optional[Dict[str, Any]] = None, + exclude: Optional[Dict[str, Any]] = None, + random_state: Optional[np.random.RandomState] = None, + init_params: Optional[Dict[str, Any]] = None, + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None + ): super().__init__( config, steps, dataset_properties, include, exclude, random_state, init_params, search_space_updates) @@ -79,8 +78,8 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: # Pre-process X loader = self.named_steps['data_loader'].get_loader(X=X) pred = self.named_steps['network'].predict(loader) - if self.dataset_properties['output_shape'] == 1: - proba = pred[:, :self.dataset_properties['num_classes']] + if isinstance(self.dataset_properties['output_shape'], int): + proba = pred[:, :self.dataset_properties['output_shape']] normalizer = proba.sum(axis=1)[:, np.newaxis] normalizer[normalizer == 0.0] = 1.0 proba /= normalizer @@ -91,7 +90,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: all_proba = [] for k in range(self.dataset_properties['output_shape']): - proba_k = pred[:, k, :self.dataset_properties['num_classes'][k]] + proba_k = pred[:, k, :self.dataset_properties['output_shape'][k]] normalizer = proba_k.sum(axis=1)[:, np.newaxis] normalizer[normalizer == 0.0] = 1.0 proba_k /= normalizer @@ -141,12 +140,11 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n return y - def _get_hyperparameter_search_space( - self, - dataset_properties: Dict[str, Any], - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - ) -> ConfigurationSpace: + def _get_hyperparameter_search_space(self, + dataset_properties: Dict[str, Any], + include: Optional[Dict[str, Any]] = None, + exclude: Optional[Dict[str, Any]] = None, + ) -> ConfigurationSpace: """Create the hyperparameter configuration space. For the given steps, and the Choices within that steps, @@ -218,7 +216,7 @@ def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]], ("preprocessing", EarlyPreprocessing()), ("network_backbone", NetworkBackboneChoice(default_dataset_properties)), ("network_head", NetworkHeadChoice(default_dataset_properties)), - ("network", NetworkComponent(default_dataset_properties)), + ("network", NetworkComponent()), ("network_init", NetworkInitializerChoice(default_dataset_properties)), ("optimizer", OptimizerChoice(default_dataset_properties)), ("lr_scheduler", SchedulerChoice(default_dataset_properties)), diff --git a/autoPyTorch/pipeline/tabular_regression.py b/autoPyTorch/pipeline/tabular_regression.py index 40645223f..86b8fec84 100644 --- a/autoPyTorch/pipeline/tabular_regression.py +++ b/autoPyTorch/pipeline/tabular_regression.py @@ -35,7 +35,7 @@ class TabularRegressionPipeline(RegressorMixin, BasePipeline): - """This class is a proof of concept to integrate AutoSklearn Components + """This class is a proof of concept to integrate AutoPyTorch Components It implements a pipeline, which includes as steps: @@ -44,7 +44,7 @@ class TabularRegressionPipeline(RegressorMixin, BasePipeline): Contrary to the sklearn API it is not possible to enumerate the possible parameters in the __init__ function because we only know the - available classifiers at runtime. For this reason the user must + available regressors at runtime. For this reason the user must specifiy the parameters by passing an instance of ConfigSpace.configuration_space.Configuration. @@ -58,32 +58,31 @@ class TabularRegressionPipeline(RegressorMixin, BasePipeline): Examples """ - def __init__( - self, - config: Optional[Configuration] = None, - steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None, - dataset_properties: Optional[Dict[str, Any]] = None, - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - random_state: Optional[np.random.RandomState] = None, - init_params: Optional[Dict[str, Any]] = None, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None - ): + def __init__(self, + config: Optional[Configuration] = None, + steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None, + dataset_properties: Optional[Dict[str, Any]] = None, + include: Optional[Dict[str, Any]] = None, + exclude: Optional[Dict[str, Any]] = None, + random_state: Optional[np.random.RandomState] = None, + init_params: Optional[Dict[str, Any]] = None, + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None + ): super().__init__( config, steps, dataset_properties, include, exclude, random_state, init_params, search_space_updates) def score(self, X: np.ndarray, y: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray: - """score. - - Args: - X (np.ndarray): input to the pipeline, from which to guess targets - batch_size (Optional[int]): batch_size controls whether the pipeline - will be called on small chunks of the data. Useful when calling the - predict method on the whole array X results in a MemoryError. - Returns: - np.ndarray: coefficient of determination R^2 of the prediction - """ + """Scores the fitted estimator on (X, y) + + Args: + X (np.ndarray): input to the pipeline, from which to guess targets + batch_size (Optional[int]): batch_size controls whether the pipeline + will be called on small chunks of the data. Useful when calling the + predict method on the whole array X results in a MemoryError. + Returns: + np.ndarray: coefficient of determination R^2 of the prediction + """ from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics, calculate_score metrics = get_metrics(self.dataset_properties, ['r2']) y_pred = self.predict(X, batch_size=batch_size) @@ -91,12 +90,11 @@ def score(self, X: np.ndarray, y: np.ndarray, batch_size: Optional[int] = None) metrics=metrics)['r2'] return r2 - def _get_hyperparameter_search_space( - self, - dataset_properties: Dict[str, Any], - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - ) -> ConfigurationSpace: + def _get_hyperparameter_search_space(self, + dataset_properties: Dict[str, Any], + include: Optional[Dict[str, Any]] = None, + exclude: Optional[Dict[str, Any]] = None, + ) -> ConfigurationSpace: """Create the hyperparameter configuration space. For the given steps, and the Choices within that steps, @@ -112,8 +110,7 @@ def _get_hyperparameter_search_space( of the dataset to guide the pipeline choices of components Returns: - cs (Configuration): The configuration space describing - the SimpleRegressionClassifier. + cs (Configuration): The configuration space describing the TabularRegressionPipeline. """ cs = ConfigurationSpace() @@ -126,12 +123,12 @@ def _get_hyperparameter_search_space( if 'target_type' not in dataset_properties: dataset_properties['target_type'] = 'tabular_regression' if dataset_properties['target_type'] != 'tabular_regression': - warnings.warn('Tabular classification is being used, however the target_type' + warnings.warn('Tabular regression is being used, however the target_type' 'is not given as "tabular_regression". Overriding it.') dataset_properties['target_type'] = 'tabular_regression' # get the base search space given this # dataset properties. Then overwrite with custom - # classification requirements + # regression requirements cs = self._get_base_search_space( cs=cs, dataset_properties=dataset_properties, exclude=exclude, include=include, pipeline=self.steps) @@ -143,8 +140,7 @@ def _get_hyperparameter_search_space( self.dataset_properties = dataset_properties return cs - def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]], - ) -> List[Tuple[str, autoPyTorchChoice]]: + def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]]) -> List[Tuple[str, autoPyTorchChoice]]: """ Defines what steps a pipeline should follow. The step itself has choices given via autoPyTorchChoice. @@ -167,7 +163,7 @@ def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]], ("preprocessing", EarlyPreprocessing()), ("network_backbone", NetworkBackboneChoice(default_dataset_properties)), ("network_head", NetworkHeadChoice(default_dataset_properties)), - ("network", NetworkComponent(default_dataset_properties)), + ("network", NetworkComponent()), ("network_init", NetworkInitializerChoice(default_dataset_properties)), ("optimizer", OptimizerChoice(default_dataset_properties)), ("lr_scheduler", SchedulerChoice(default_dataset_properties)), @@ -183,4 +179,4 @@ def _get_estimator_hyperparameter_name(self) -> str: Returns: str: name of the pipeline type """ - return "tabular_regresser" + return "tabular_regressor" diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py index 88af11531..1fa1c0f8f 100644 --- a/autoPyTorch/utils/common.py +++ b/autoPyTorch/utils/common.py @@ -135,6 +135,23 @@ def hash_array_or_matrix(X: Union[np.ndarray, pd.DataFrame]) -> str: return hash +def get_device_from_fit_dictionary(X: Dict[str, Any]) -> torch.device: + """ + Get a torch device object by checking if the fit dictionary specifies a device. If not, or if no GPU is available + return a CPU device. + + Args: + X (Dict[str, Any]): A fit dictionary to control how the pipeline is fitted + + Returns: + torch.device: Device to be used for training/inference + """ + if not torch.cuda.is_available(): + return torch.device("cpu") + + return torch.device(X.get("device", "cpu")) + + def subsampler(data: Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix], x: Union[np.ndarray, List[int]] ) -> Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix]: diff --git a/examples/example_tabular_regression.py b/examples/example_tabular_regression.py new file mode 100644 index 000000000..43c901827 --- /dev/null +++ b/examples/example_tabular_regression.py @@ -0,0 +1,116 @@ +""" +====================== +Tabular Regression +====================== + +The following example shows how to fit a sample classification model +with AutoPyTorch +""" +import os +import tempfile as tmp +import typing +import warnings + +from sklearn.datasets import make_regression + +from autoPyTorch.data.tabular_feature_validator import TabularFeatureValidator + +os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() +os.environ['OMP_NUM_THREADS'] = '1' +os.environ['OPENBLAS_NUM_THREADS'] = '1' +os.environ['MKL_NUM_THREADS'] = '1' + +warnings.simplefilter(action='ignore', category=UserWarning) +warnings.simplefilter(action='ignore', category=FutureWarning) + +from sklearn import model_selection, preprocessing + +from autoPyTorch.api.tabular_regression import TabularRegressionTask +from autoPyTorch.datasets.tabular_dataset import TabularDataset +from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates + + +def get_search_space_updates(): + """ + Search space updates to the task can be added using HyperparameterSearchSpaceUpdates + Returns: + HyperparameterSearchSpaceUpdates + """ + updates = HyperparameterSearchSpaceUpdates() + updates.append(node_name="data_loader", + hyperparameter="batch_size", + value_range=[16, 512], + default_value=32) + updates.append(node_name="lr_scheduler", + hyperparameter="CosineAnnealingLR:T_max", + value_range=[50, 60], + default_value=55) + updates.append(node_name='network_backbone', + hyperparameter='ResNetBackbone:dropout', + value_range=[0, 0.5], + default_value=0.2) + return updates + + +if __name__ == '__main__': + ############################################################################ + # Data Loading + # ============ + + # Get the training data for tabular regression + # X, y = datasets.fetch_openml(name="cholesterol", return_X_y=True) + + # Use dummy data for now since there are problems with categorical columns + X, y = make_regression( + n_samples=5000, + n_features=4, + n_informative=3, + n_targets=1, + shuffle=True, + random_state=0 + ) + + X_train, X_test, y_train, y_test = model_selection.train_test_split( + X, + y, + random_state=1, + ) + + # Scale the regression targets to have zero mean and unit variance. + # This is important for Neural Networks since predicting large target values would require very large weights. + # One can later rescale the network predictions like this: y_pred = y_pred_scaled * y_train_std + y_train_mean + y_train_mean = y_train.mean() + y_train_std = y_train.std() + + y_train_scaled = (y_train - y_train_mean) / y_train_std + y_test_scaled = (y_test - y_train_mean) / y_train_std + + ############################################################################ + # Build and fit a regressor + # ========================== + api = TabularRegressionTask( + delete_tmp_folder_after_terminate=False, + search_space_updates=get_search_space_updates() + ) + api.search( + X_train=X_train, + y_train=y_train_scaled, + X_test=X_test.copy(), + y_test=y_test_scaled.copy(), + optimize_metric='r2', + total_walltime_limit=500, + func_eval_time_limit=50, + traditional_per_total_budget=0 + ) + + ############################################################################ + # Print the final ensemble performance + # ==================================== + print(api.run_history, api.trajectory) + y_pred_scaled = api.predict(X_test) + + # Rescale the Neural Network predictions into the original target range + y_pred = y_pred_scaled * y_train_std + y_train_mean + score = api.score(y_pred, y_test) + + print(score) diff --git a/test/conftest.py b/test/conftest.py index 5b63f33ad..a5d0fe0af 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -8,9 +8,11 @@ import numpy as np +import pandas as pd + import pytest -from sklearn.datasets import fetch_openml, make_classification +from sklearn.datasets import fetch_openml, make_classification, make_regression from autoPyTorch.data.tabular_validator import TabularInputValidator from autoPyTorch.datasets.tabular_dataset import TabularDataset @@ -145,29 +147,86 @@ def session_run_at_end(): return client -# Dataset fixture to test different scenarios on a scalable way -# Please refer to https://docs.pytest.org/en/stable/fixture.html for details -# on what fixtures are -@pytest.fixture -def fit_dictionary(request): - return request.getfixturevalue(request.param) - - -@pytest.fixture -def fit_dictionary_numerical_only(backend): - X, y = make_classification( - n_samples=200, - n_features=10, - n_informative=6, - n_redundant=4, - n_repeated=0, - n_classes=2, - n_clusters_per_class=2, - shuffle=True, - random_state=0 - ) - X = X.astype('float64') - validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) +def get_tabular_data(task): + if task == "classification_numerical_only": + X, y = make_classification( + n_samples=200, + n_features=4, + n_informative=3, + n_redundant=1, + n_repeated=0, + n_classes=2, + n_clusters_per_class=2, + shuffle=True, + random_state=0 + ) + validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) + + elif task == "classification_categorical_only": + X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) + categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category'] + X = X[categorical_columns] + X = X.iloc[0:200] + y = y.iloc[0:200] + validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) + + elif task == "classification_numerical_and_categorical": + X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) + X = X.iloc[0:200] + y = y.iloc[0:200] + validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) + + elif task == "regression_numerical_only": + X, y = make_regression(n_samples=200, + n_features=4, + n_informative=3, + n_targets=1, + shuffle=True, + random_state=0) + y = (y - y.mean()) / y.std() + validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy()) + + elif task == "regression_categorical_only": + X, y = fetch_openml("cholesterol", return_X_y=True, as_frame=True) + categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category'] + X = X[categorical_columns] + + # fill nan values for now since they are not handled properly yet + for column in X.columns: + if X[column].dtype.name == "category": + X[column] = pd.Categorical(X[column], + categories=list(X[column].cat.categories) + ["missing"]).fillna("missing") + else: + X[column] = X[column].fillna(0) + + X = X.iloc[0:200] + y = y.iloc[0:200] + y = (y - y.mean()) / y.std() + validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy()) + + elif task == "regression_numerical_and_categorical": + X, y = fetch_openml("cholesterol", return_X_y=True, as_frame=True) + + # fill nan values for now since they are not handled properly yet + for column in X.columns: + if X[column].dtype.name == "category": + X[column] = pd.Categorical(X[column], + categories=list(X[column].cat.categories) + ["missing"]).fillna("missing") + else: + X[column] = X[column].fillna(0) + + X = X.iloc[0:200] + y = y.iloc[0:200] + y = (y - y.mean()) / y.std() + validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy()) + + else: + raise ValueError("Unsupported task {}".format(task)) + + return X, y, validator + + +def get_fit_dictionary(X, y, validator, backend): datamanager = TabularDataset( X=X, Y=y, validator=validator, @@ -177,45 +236,7 @@ def fit_dictionary_numerical_only(backend): info = datamanager.get_required_dataset_info() dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info)) - fit_dictionary = { - 'X_train': datamanager.train_tensors[0], - 'y_train': datamanager.train_tensors[1], - 'train_indices': datamanager.splits[0][0], - 'val_indices': datamanager.splits[0][1], - 'dataset_properties': dataset_properties, - 'num_run': np.random.randint(50), - 'device': 'cpu', - 'budget_type': 'epochs', - 'epochs': 1, - 'torch_num_threads': 1, - 'early_stopping': 20, - 'working_dir': '/tmp', - 'use_tensorboard_logger': True, - 'use_pynisher': False, - 'metrics_during_training': True, - 'split_id': 0, - 'backend': backend, - } - backend.save_datamanager(datamanager) - return fit_dictionary - - -@pytest.fixture -def fit_dictionary_categorical_only(backend): - X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category'] - X = X[categorical_columns] - X = X.iloc[0:200] - y = y.iloc[0:200] - validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) - datamanager = TabularDataset( - X=X, Y=y, - validator=validator, - X_test=X, Y_test=y, - ) - info = datamanager.get_required_dataset_info() - dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info)) fit_dictionary = { 'X_train': datamanager.train_tensors[0], 'y_train': datamanager.train_tensors[1], @@ -225,9 +246,9 @@ def fit_dictionary_categorical_only(backend): 'num_run': np.random.randint(50), 'device': 'cpu', 'budget_type': 'epochs', - 'epochs': 1, + 'epochs': 100, 'torch_num_threads': 1, - 'early_stopping': 20, + 'early_stopping': 10, 'working_dir': '/tmp', 'use_tensorboard_logger': True, 'use_pynisher': False, @@ -240,41 +261,20 @@ def fit_dictionary_categorical_only(backend): @pytest.fixture -def fit_dictionary_num_and_categorical(backend): - X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - X = X.iloc[0:200] - y = y.iloc[0:200] - validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy()) - datamanager = TabularDataset( - X=X, Y=y, - validator=validator, - X_test=X, Y_test=y, - ) - info = datamanager.get_required_dataset_info() +def fit_dictionary_tabular_dummy(request, backend): + if request.param == "classification": + X, y, validator = get_tabular_data("classification_numerical_only") + elif request.param == "regression": + X, y, validator = get_tabular_data("regression_numerical_only") + else: + raise ValueError("Unsupported indirect fixture {}".format(request.param)) + return get_fit_dictionary(X, y, validator, backend) - dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info)) - fit_dictionary = { - 'X_train': datamanager.train_tensors[0], - 'y_train': datamanager.train_tensors[1], - 'train_indices': datamanager.splits[0][0], - 'val_indices': datamanager.splits[0][1], - 'dataset_properties': dataset_properties, - 'num_run': np.random.randint(50), - 'device': 'cpu', - 'budget_type': 'epochs', - 'epochs': 1, - 'torch_num_threads': 1, - 'early_stopping': 20, - 'working_dir': '/tmp', - 'use_tensorboard_logger': True, - 'use_pynisher': False, - 'metrics_during_training': True, - 'split_id': 0, - 'backend': backend, - } - backend.save_datamanager(datamanager) - return fit_dictionary +@pytest.fixture +def fit_dictionary_tabular(request, backend): + X, y, validator = get_tabular_data(request.param) + return get_fit_dictionary(X, y, validator, backend) @pytest.fixture diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index cdd22882d..ea7cccd72 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -4,16 +4,19 @@ import numpy as np +import pandas as pd + import pytest import sklearn import sklearn.datasets -from sklearn.ensemble import VotingClassifier +from sklearn.ensemble import VotingClassifier, VotingRegressor import torch from autoPyTorch.api.tabular_classification import TabularClassificationTask +from autoPyTorch.api.tabular_regression import TabularRegressionTask from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, HoldoutValTypes, @@ -30,7 +33,7 @@ @pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation, CrossValTypes.k_fold_cross_validation, )) -def test_classification(openml_id, resampling_strategy, backend): +def test_tabular_classification(openml_id, resampling_strategy, backend): # Get the data and check that contents of data-manager make sense X, y = sklearn.datasets.fetch_openml( @@ -173,3 +176,164 @@ def test_classification(openml_id, resampling_strategy, backend): with open(dump_file, 'rb') as f: restored_estimator = pickle.load(f) restored_estimator.predict(X_test) + + +@pytest.mark.parametrize('openml_name', ("cholesterol", )) +@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation, + CrossValTypes.k_fold_cross_validation, + )) +def test_tabular_regression(openml_name, resampling_strategy, backend): + + # Get the data and check that contents of data-manager make sense + X, y = sklearn.datasets.fetch_openml( + openml_name, + return_X_y=True, + as_frame=True + ) + # normalize values + y = (y - y.mean()) / y.std() + + # fill NAs for now since they are not yet properly handled + for column in X.columns: + if X[column].dtype.name == "category": + X[column] = pd.Categorical(X[column], + categories=list(X[column].cat.categories) + ["missing"]).fillna("missing") + else: + X[column] = X[column].fillna(0) + + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, y, random_state=1) + + # Search for a good configuration + estimator = TabularRegressionTask( + backend=backend, + resampling_strategy=resampling_strategy, + ) + + estimator.search( + X_train=X_train, y_train=y_train, + X_test=X_test, y_test=y_test, + optimize_metric='r2', + total_walltime_limit=50, + func_eval_time_limit=10, + traditional_per_total_budget=0 + ) + + # Internal dataset has expected settings + assert estimator.dataset.task_type == 'tabular_regression' + expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 3 + assert estimator.resampling_strategy == resampling_strategy + assert estimator.dataset.resampling_strategy == resampling_strategy + assert len(estimator.dataset.splits) == expected_num_splits + + # TODO: check for budget + + # Check for the created files + tmp_dir = estimator._backend.temporary_directory + loaded_datamanager = estimator._backend.load_datamanager() + assert len(loaded_datamanager.train_tensors) == len(estimator.dataset.train_tensors) + + expected_files = [ + 'smac3-output/run_1/configspace.json', + 'smac3-output/run_1/runhistory.json', + 'smac3-output/run_1/scenario.txt', + 'smac3-output/run_1/stats.json', + 'smac3-output/run_1/train_insts.txt', + 'smac3-output/run_1/trajectory.json', + '.autoPyTorch/datamanager.pkl', + '.autoPyTorch/ensemble_read_preds.pkl', + '.autoPyTorch/start_time_1', + '.autoPyTorch/ensemble_history.json', + '.autoPyTorch/ensemble_read_scores.pkl', + '.autoPyTorch/true_targets_ensemble.npy', + ] + for expected_file in expected_files: + assert os.path.exists(os.path.join(tmp_dir, expected_file)), expected_file + + # Check that smac was able to find proper models + succesful_runs = [run_value.status for run_value in estimator.run_history.data.values( + ) if 'SUCCESS' in str(run_value.status)] + assert len(succesful_runs) > 1, [(k, v) for k, v in estimator.run_history.data.items()] + + # Search for an existing run key in disc. A individual model might have + # a timeout and hence was not written to disc + for i, (run_key, value) in enumerate(estimator.run_history.data.items()): + if i == 0: + # Ignore dummy run + continue + if 'SUCCESS' not in str(value.status): + continue + + run_key_model_run_dir = estimator._backend.get_numrun_directory( + estimator.seed, run_key.config_id, run_key.budget) + if os.path.exists(run_key_model_run_dir): + break + + if resampling_strategy == HoldoutValTypes.holdout_validation: + model_file = os.path.join(run_key_model_run_dir, + f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.model") + assert os.path.exists(model_file), model_file + model = estimator._backend.load_model_by_seed_and_id_and_budget( + estimator.seed, run_key.config_id, run_key.budget) + assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module) + elif resampling_strategy == CrossValTypes.k_fold_cross_validation: + model_file = os.path.join( + run_key_model_run_dir, + f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.cv_model" + ) + assert os.path.exists(model_file), model_file + model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( + estimator.seed, run_key.config_id, run_key.budget) + assert isinstance(model, VotingRegressor) + assert len(model.estimators_) == 3 + assert isinstance(model.estimators_[0].named_steps['network'].get_network(), + torch.nn.Module) + else: + pytest.fail(resampling_strategy) + + # Make sure that predictions on the test data are printed and make sense + test_prediction = os.path.join(run_key_model_run_dir, + estimator._backend.get_prediction_filename( + 'test', estimator.seed, run_key.config_id, + run_key.budget)) + assert os.path.exists(test_prediction), test_prediction + assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0] + + # Also, for ensemble builder, the OOF predictions should be there and match + # the Ground truth that is also physically printed to disk + ensemble_prediction = os.path.join(run_key_model_run_dir, + estimator._backend.get_prediction_filename( + 'ensemble', + estimator.seed, run_key.config_id, + run_key.budget)) + assert os.path.exists(ensemble_prediction), ensemble_prediction + assert np.shape(np.load(ensemble_prediction, allow_pickle=True))[0] == np.shape( + estimator._backend.load_targets_ensemble() + )[0] + + # Ensemble Builder produced an ensemble + estimator.ensemble_ is not None + + # There should be a weight for each element of the ensemble + assert len(estimator.ensemble_.identifiers_) == len(estimator.ensemble_.weights_) + + y_pred = estimator.predict(X_test) + + assert np.shape(y_pred)[0] == np.shape(X_test)[0] + + score = estimator.score(y_pred, y_test) + assert 'r2' in score + + # Check that we can pickle + # Test pickle + # This can happen on python greater than 3.6 + # as older python do not control the state of the logger + if sys.version_info >= (3, 7): + dump_file = os.path.join(estimator._backend.temporary_directory, 'dump.pkl') + + with open(dump_file, 'wb') as f: + pickle.dump(estimator, f) + + with open(dump_file, 'rb') as f: + restored_estimator = pickle.load(f) + restored_estimator.predict(X_test) diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py index 94a0c8ea4..afa2b43e1 100644 --- a/test/test_data/test_feature_validator.py +++ b/test/test_data/test_feature_validator.py @@ -1,5 +1,4 @@ import copy -import random import numpy as np @@ -518,7 +517,6 @@ def test_featurevalidator_new_data_after_fit(openml_id, validator.dtypes = old_dtypes if test_data_type == 'pandas': columns = X_test.columns.tolist() - random.shuffle(columns) - X_test = X_test[columns] + X_test = X_test[reversed(columns)] with pytest.raises(ValueError, match=r"Changing the column order of the features"): transformed_X = validator.transform(X_test) diff --git a/test/test_datasets/test_tabular_dataset.py b/test/test_datasets/test_tabular_dataset.py index b96942902..ab0d09b9b 100644 --- a/test/test_datasets/test_tabular_dataset.py +++ b/test/test_datasets/test_tabular_dataset.py @@ -3,11 +3,10 @@ from autoPyTorch.utils.pipeline import get_dataset_requirements -@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only', - 'fit_dictionary_categorical_only', - 'fit_dictionary_num_and_categorical'], indirect=True) -def test_get_dataset_properties(backend, fit_dictionary): - +@pytest.mark.parametrize("fit_dictionary_tabular", ['classification_numerical_only', + 'classification_categorical_only', + 'classification_numerical_and_categorical'], indirect=True) +def test_get_dataset_properties(backend, fit_dictionary_tabular): # The fixture creates a datamanager by itself datamanager = backend.load_datamanager() @@ -27,8 +26,7 @@ def test_get_dataset_properties(backend, fit_dictionary): 'task_type', 'output_type', 'input_shape', - 'output_shape', - 'num_classes', + 'output_shape' ]: assert expected in dataset_properties @@ -37,6 +35,6 @@ def test_get_dataset_properties(backend, fit_dictionary): assert dataset_requirement.name in dataset_properties.keys() assert isinstance(dataset_properties[dataset_requirement.name], dataset_requirement.supported_types) - assert datamanager.train_tensors[0].shape == fit_dictionary['X_train'].shape - assert datamanager.train_tensors[1].shape == fit_dictionary['y_train'].shape + assert datamanager.train_tensors[0].shape == fit_dictionary_tabular['X_train'].shape + assert datamanager.train_tensors[1].shape == fit_dictionary_tabular['y_train'].shape assert datamanager.task_type == 'tabular_classification' diff --git a/test/test_pipeline/components/base.py b/test/test_pipeline/components/base.py index 6ad3ad824..8adbbd48a 100644 --- a/test/test_pipeline/components/base.py +++ b/test/test_pipeline/components/base.py @@ -2,11 +2,12 @@ import unittest from typing import Any, Dict, List, Optional, Tuple -from sklearn.datasets import make_classification +from sklearn.datasets import make_classification, make_regression import torch -from autoPyTorch.constants import STRING_TO_OUTPUT_TYPES, STRING_TO_TASK_TYPES +from autoPyTorch.constants import BINARY, CLASSIFICATION_TASKS, CONTINUOUS, OUTPUT_TYPES_TO_STRING, REGRESSION_TASKS, \ + TASK_TYPES_TO_STRING from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import \ TabularColumnTransformer @@ -15,71 +16,112 @@ from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics -from autoPyTorch.pipeline.components.training.trainer.base_trainer import BudgetTracker +from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent, BudgetTracker from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline class BaseTraining(unittest.TestCase): - def setUp(self): - # Data - self.X, self.y = make_classification( - n_samples=5000, - n_features=4, - n_informative=3, - n_redundant=1, - n_repeated=0, - n_classes=2, - n_clusters_per_class=2, - shuffle=True, - random_state=0 - ) - self.X = torch.FloatTensor(self.X) - self.y = torch.LongTensor(self.y) - self.dataset = torch.utils.data.TensorDataset(self.X, self.y) - self.loader = torch.utils.data.DataLoader(self.dataset, batch_size=20) - self.dataset_properties = { - 'task_type': 'tabular_classification', - 'output_type': 'binary' + def prepare_trainer(self, + trainer: BaseTrainerComponent, + task_type: int): + if task_type in CLASSIFICATION_TASKS: + X, y = make_classification( + n_samples=5000, + n_features=4, + n_informative=3, + n_redundant=1, + n_repeated=0, + n_classes=2, + n_clusters_per_class=2, + shuffle=True, + random_state=0 + ) + X = torch.tensor(X, dtype=torch.float) + y = torch.tensor(y, dtype=torch.long) + output_type = BINARY + num_outputs = 2 + criterion = torch.nn.CrossEntropyLoss + + elif task_type in REGRESSION_TASKS: + X, y = make_regression( + n_samples=5000, + n_features=4, + n_informative=3, + n_targets=1, + shuffle=True, + random_state=0 + ) + X = torch.tensor(X, dtype=torch.float) + y = torch.tensor(y, dtype=torch.float) + # normalize targets for regression since NNs are better when predicting small outputs + y = ((y - y.mean()) / y.std()).unsqueeze(1) + output_type = CONTINUOUS + num_outputs = 1 + criterion = torch.nn.MSELoss + + else: + raise ValueError(f"task type {task_type} not supported for standard trainer test") + + dataset = torch.utils.data.TensorDataset(X, y) + loader = torch.utils.data.DataLoader(dataset, batch_size=20) + dataset_properties = { + 'task_type': TASK_TYPES_TO_STRING[task_type], + 'output_type': OUTPUT_TYPES_TO_STRING[output_type] } # training requirements - layers = [] - layers.append(torch.nn.Linear(4, 4)) - layers.append(torch.nn.Sigmoid()) - layers.append(torch.nn.Linear(4, 2)) - self.model = torch.nn.Sequential(*layers) - self.criterion = torch.nn.CrossEntropyLoss - self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01) - self.device = torch.device('cpu') - self.logger = logging.getLogger('test') - self.metrics = get_metrics(self.dataset_properties) - self.epochs = 20 - self.budget_tracker = BudgetTracker( + model = torch.nn.Sequential( + torch.nn.Linear(4, 4), + torch.nn.Sigmoid(), + torch.nn.Linear(4, num_outputs) + ) + + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + device = torch.device('cpu') + logger = logging.getLogger('StandardTrainer - test') + metrics = get_metrics(dataset_properties) + epochs = 1000 + budget_tracker = BudgetTracker( budget_type='epochs', - max_epochs=self.epochs, + max_epochs=epochs, + ) + + trainer.prepare( + scheduler=None, + model=model, + metrics=metrics, + criterion=criterion, + budget_tracker=budget_tracker, + optimizer=optimizer, + device=device, + metrics_during_training=True, + task_type=task_type, + output_type=output_type, + labels=y ) - self.task_type = STRING_TO_TASK_TYPES[self.dataset_properties['task_type']] - self.output_type = STRING_TO_OUTPUT_TYPES[self.dataset_properties['output_type']] - - def _overfit_model(self): - self.model.train() - # initialise the criterion as it is - # not being done in __init__ - self.criterion = self.criterion() - for epoch in range(self.epochs): - total_loss = 0 - for x, y in self.loader: - self.optimizer.zero_grad() + return trainer, model, optimizer, loader, criterion, epochs, logger + + def train_model(self, + model: torch.nn.Module, + optimizer: torch.optim.Optimizer, + loader: torch.utils.data.DataLoader, + criterion: torch.nn.Module, + epochs: int): + model.train() + + criterion = criterion() if not isinstance(criterion, torch.nn.Module) else criterion + for epoch in range(epochs): + for X, y in loader: + optimizer.zero_grad() # Forward pass - y_pred = self.model(self.X) + y_pred = model(X) # Compute Loss - loss = self.criterion(y_pred.squeeze(), self.y) - total_loss += loss + loss = criterion(y_pred, y) # Backward pass loss.backward() - self.optimizer.step() + optimizer.step() class TabularPipeline(TabularClassificationPipeline): diff --git a/test/test_pipeline/components/test_feature_preprocessor.py b/test/test_pipeline/components/test_feature_preprocessor.py index a812929e9..225193217 100644 --- a/test/test_pipeline/components/test_feature_preprocessor.py +++ b/test/test_pipeline/components/test_feature_preprocessor.py @@ -7,7 +7,7 @@ from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \ NoFeaturePreprocessor import NoFeaturePreprocessor -from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing.\ +from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.feature_preprocessing. \ base_feature_preprocessor_choice import FeatureProprocessorChoice from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline @@ -18,20 +18,20 @@ def preprocessor(request): return request.param -@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only', - 'fit_dictionary_num_and_categorical'], indirect=True) +@pytest.mark.parametrize("fit_dictionary_tabular", ['classification_numerical_only', + 'classification_numerical_and_categorical'], indirect=True) class TestFeaturePreprocessors: - def test_feature_preprocessor(self, fit_dictionary, preprocessor): + def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor): preprocessor = FeatureProprocessorChoice( - dataset_properties=fit_dictionary['dataset_properties'] + dataset_properties=fit_dictionary_tabular['dataset_properties'] ).get_components()[preprocessor]() - configuration = preprocessor.\ - get_hyperparameter_search_space(dataset_properties=fit_dictionary["dataset_properties"]) \ + configuration = preprocessor. \ + get_hyperparameter_search_space(dataset_properties=fit_dictionary_tabular["dataset_properties"]) \ .get_default_configuration().get_dictionary() preprocessor = preprocessor.set_params(**configuration) - preprocessor.fit(fit_dictionary) - X = preprocessor.transform(fit_dictionary) + preprocessor.fit(fit_dictionary_tabular) + X = preprocessor.transform(fit_dictionary_tabular) sklearn_preprocessor = X['feature_preprocessor']['numerical'] # check if the fit dictionary X is modified as expected @@ -51,22 +51,22 @@ def test_feature_preprocessor(self, fit_dictionary, preprocessor): transformed = column_transformer.transform(X['X_train']) assert isinstance(transformed, np.ndarray) - def test_pipeline_fit_include(self, fit_dictionary, preprocessor): + def test_pipeline_fit_include(self, fit_dictionary_tabular, preprocessor): """ This test ensures that a tabular classification pipeline can be fit with all preprocessors in the include """ - fit_dictionary['epochs'] = 1 + fit_dictionary_tabular['epochs'] = 1 pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties'], + dataset_properties=fit_dictionary_tabular['dataset_properties'], include={'feature_preprocessor': [preprocessor]}) cs = pipeline.get_hyperparameter_search_space() config = cs.sample_configuration() pipeline.set_hyperparameters(config) - pipeline.fit(fit_dictionary) + pipeline.fit(fit_dictionary_tabular) # To make sure we fitted the model, there should be a # run summary object with accuracy diff --git a/test/test_pipeline/components/test_setup_networks.py b/test/test_pipeline/components/test_setup_networks.py index 46debb0c5..be8af94c5 100644 --- a/test/test_pipeline/components/test_setup_networks.py +++ b/test/test_pipeline/components/test_setup_networks.py @@ -18,16 +18,16 @@ def head(request): @flaky.flaky(max_runs=3) -@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only', - 'fit_dictionary_categorical_only', - 'fit_dictionary_num_and_categorical'], indirect=True) +@pytest.mark.parametrize("fit_dictionary_tabular", ['classification_numerical_only', + 'classification_categorical_only', + 'classification_numerical_and_categorical'], indirect=True) class TestNetworks: - def test_pipeline_fit(self, fit_dictionary, backbone, head): + def test_pipeline_fit(self, fit_dictionary_tabular, backbone, head): """This test makes sure that the pipeline is able to fit given random combinations of hyperparameters across the pipeline""" pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties'], + dataset_properties=fit_dictionary_tabular['dataset_properties'], include={'network_backbone': [backbone], 'network_head': [head]}) cs = pipeline.get_hyperparameter_search_space() config = cs.get_default_configuration() @@ -35,13 +35,12 @@ def test_pipeline_fit(self, fit_dictionary, backbone, head): assert backbone == config.get('network_backbone:__choice__', None) assert head == config.get('network_head:__choice__', None) pipeline.set_hyperparameters(config) - # Need more epochs to make sure validation performance is met - fit_dictionary['epochs'] = 100 + fit_dictionary_tabular['epochs'] = 100 # Early stop to the best configuration seen - fit_dictionary['early_stopping'] = 50 + fit_dictionary_tabular['early_stopping'] = 50 - pipeline.fit(fit_dictionary) + pipeline.fit(fit_dictionary_tabular) # To make sure we fitted the model, there should be a # run summary object with accuracy @@ -64,16 +63,16 @@ def test_pipeline_fit(self, fit_dictionary, backbone, head): # Check that early stopping happened, if it did # We should not stop before patience - assert run_summary.get_last_epoch() >= fit_dictionary['early_stopping'] + assert run_summary.get_last_epoch() >= fit_dictionary_tabular['early_stopping'] # we should not be greater than max allowed epoch - assert run_summary.get_last_epoch() <= fit_dictionary['epochs'] + assert run_summary.get_last_epoch() <= fit_dictionary_tabular['epochs'] # every trained epoch has a val metric assert run_summary.get_last_epoch() == max(list(run_summary.performance_tracker['train_metrics'].keys())) epochs_since_best = run_summary.get_last_epoch() - run_summary.get_best_epoch() - if epochs_since_best >= fit_dictionary['early_stopping']: + if epochs_since_best >= fit_dictionary_tabular['early_stopping']: assert run_summary.get_best_epoch() == epoch_where_best # Make sure a network was fit diff --git a/test/test_pipeline/components/test_setup_preprocessing_node.py b/test/test_pipeline/components/test_setup_preprocessing_node.py index 1794ee96f..30c4843cd 100644 --- a/test/test_pipeline/components/test_setup_preprocessing_node.py +++ b/test/test_pipeline/components/test_setup_preprocessing_node.py @@ -36,7 +36,7 @@ def test_tabular_preprocess(self): 'output_type': OUTPUT_TYPES_TO_STRING[MULTICLASS], 'is_small_preprocess': True, 'input_shape': (15,), - 'num_classes': 2, + 'output_shape': 2, 'categories': [], 'issparse': False } @@ -72,7 +72,7 @@ def test_tabular_no_preprocess(self): 'output_type': OUTPUT_TYPES_TO_STRING[MULTICLASS], 'is_small_preprocess': False, 'input_shape': (15,), - 'num_classes': 2, + 'output_shape': 2, 'categories': [], 'issparse': False } diff --git a/test/test_pipeline/components/test_tabular_column_transformer.py b/test/test_pipeline/components/test_tabular_column_transformer.py index 5eae26f69..ef113c5eb 100644 --- a/test/test_pipeline/components/test_tabular_column_transformer.py +++ b/test/test_pipeline/components/test_tabular_column_transformer.py @@ -13,15 +13,14 @@ ) -@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only', - 'fit_dictionary_categorical_only', - 'fit_dictionary_num_and_categorical'], indirect=True) +@pytest.mark.parametrize("fit_dictionary_tabular", ['classification_numerical_only', + 'classification_categorical_only', + 'classification_numerical_and_categorical'], indirect=True) class TestTabularTransformer: - def test_tabular_preprocess(self, fit_dictionary): - - pipeline = TabularPipeline(dataset_properties=fit_dictionary['dataset_properties']) - pipeline = pipeline.fit(fit_dictionary) - X = pipeline.transform(fit_dictionary) + def test_tabular_preprocess(self, fit_dictionary_tabular): + pipeline = TabularPipeline(dataset_properties=fit_dictionary_tabular['dataset_properties']) + pipeline = pipeline.fit(fit_dictionary_tabular) + X = pipeline.transform(fit_dictionary_tabular) column_transformer = X['tabular_transformer'] # check if transformer was added to fit dictionary @@ -34,7 +33,7 @@ def test_tabular_preprocess(self, fit_dictionary): data = column_transformer.preprocessor.fit_transform(X['X_train']) assert isinstance(data, np.ndarray) - def test_sparse_data(self, fit_dictionary): + def test_sparse_data(self, fit_dictionary_tabular): X = np.random.binomial(1, 0.1, (100, 2000)) sparse_X = csr_matrix(X) numerical_columns = list(range(2000)) diff --git a/test/test_pipeline/components/test_training.py b/test/test_pipeline/components/test_training.py index 4e8e9b0ca..081c6bcaa 100644 --- a/test/test_pipeline/components/test_training.py +++ b/test/test_pipeline/components/test_training.py @@ -10,6 +10,7 @@ import torch +from autoPyTorch import constants from autoPyTorch.pipeline.components.training.data_loader.base_data_loader import ( BaseDataLoaderComponent, ) @@ -20,13 +21,11 @@ StandardTrainer ) from autoPyTorch.pipeline.components.training.trainer.base_trainer import ( - BaseTrainerComponent, -) + BaseTrainerComponent, ) from autoPyTorch.pipeline.components.training.trainer.base_trainer_choice import ( TrainerChoice, ) - sys.path.append(os.path.dirname(__file__)) from base import BaseTraining # noqa (E402: module level import not at top of file) @@ -64,8 +63,8 @@ def test_check_requirements(self): # No input in fit dictionary with self.assertRaisesRegex( - ValueError, - 'To fit a data loader, expected fit dictionary to have split_id.' + ValueError, + 'To fit a data loader, expected fit dictionary to have split_id.' ): loader.fit(fit_dictionary) @@ -129,101 +128,99 @@ def test_evaluate(self): Makes sure we properly evaluate data, returning a proper loss and metric """ - trainer = BaseTrainerComponent() - trainer.prepare( - model=self.model, - metrics=self.metrics, - criterion=self.criterion, - budget_tracker=self.budget_tracker, - optimizer=self.optimizer, - device=self.device, - metrics_during_training=True, - scheduler=None, - task_type=self.task_type, - output_type=self.output_type, - labels=self.y - ) - - prev_loss, prev_metrics = trainer.evaluate(self.loader, epoch=1, writer=None) + (trainer, + model, + optimizer, + loader, + criterion, + epochs, + logger) = self.prepare_trainer(BaseTrainerComponent(), + constants.TABULAR_CLASSIFICATION) + + prev_loss, prev_metrics = trainer.evaluate(loader, epoch=1, writer=None) self.assertIn('accuracy', prev_metrics) # Fit the model - self._overfit_model() + self.train_model(model, + optimizer, + loader, + criterion, + epochs) # Loss and metrics should have improved after fit # And the prediction should be better than random - loss, metrics = trainer.evaluate(self.loader, epoch=1, writer=None) + loss, metrics = trainer.evaluate(loader, epoch=1, writer=None) self.assertGreater(prev_loss, loss) self.assertGreater(metrics['accuracy'], prev_metrics['accuracy']) self.assertGreater(metrics['accuracy'], 0.5) -class StandartTrainerTest(BaseTraining, unittest.TestCase): +class StandardTrainerTest(BaseTraining, unittest.TestCase): - def test_epoch_training(self): - """ - Makes sure we are able to train a model and produce good - training performance - """ - trainer = StandardTrainer() - trainer.prepare( - scheduler=None, - model=self.model, - metrics=self.metrics, - criterion=self.criterion, - budget_tracker=self.budget_tracker, - optimizer=self.optimizer, - device=self.device, - metrics_during_training=True, - task_type=self.task_type, - output_type=self.output_type, - labels=self.y - ) + def test_regression_epoch_training(self): + (trainer, + _, + _, + loader, + _, + epochs, + logger) = self.prepare_trainer(StandardTrainer(), + constants.TABULAR_REGRESSION) + + # Train the model + counter = 0 + r2 = 0 + while r2 < 0.7: + loss, metrics = trainer.train_epoch(loader, epoch=1, logger=logger, writer=None) + counter += 1 + r2 = metrics['r2'] + + if counter > epochs: + self.fail(f"Could not overfit a dummy regression under {epochs} epochs") + + def test_classification_epoch_training(self): + (trainer, + _, + _, + loader, + _, + epochs, + logger) = self.prepare_trainer(StandardTrainer(), + constants.TABULAR_CLASSIFICATION) # Train the model counter = 0 accuracy = 0 while accuracy < 0.7: - loss, metrics = trainer.train_epoch(self.loader, epoch=1, logger=self.logger, writer=None) + loss, metrics = trainer.train_epoch(loader, epoch=1, logger=logger, writer=None) counter += 1 accuracy = metrics['accuracy'] - if counter > 1000: - self.fail("Could not overfit a dummy binary classification under 1000 epochs") + if counter > epochs: + self.fail(f"Could not overfit a dummy classification under {epochs} epochs") class MixUpTrainerTest(BaseTraining, unittest.TestCase): - - def test_epoch_training(self): - """ - Makes sure we are able to train a model and produce good - training performance - """ - trainer = MixUpTrainer(alpha=0.5) - trainer.prepare( - scheduler=None, - model=self.model, - metrics=self.metrics, - criterion=self.criterion, - budget_tracker=self.budget_tracker, - optimizer=self.optimizer, - device=self.device, - metrics_during_training=True, - task_type=self.task_type, - output_type=self.output_type, - labels=self.y - ) + def test_classification_epoch_training(self): + (trainer, + _, + _, + loader, + _, + epochs, + logger) = self.prepare_trainer(MixUpTrainer(alpha=0.5), + constants.TABULAR_CLASSIFICATION) # Train the model counter = 0 accuracy = 0 while accuracy < 0.7: - loss, metrics = trainer.train_epoch(self.loader, epoch=1, logger=self.logger, writer=None) + loss, metrics = trainer.train_epoch(loader, epoch=1, logger=logger, writer=None) counter += 1 accuracy = metrics['accuracy'] - if counter > 1000: - self.fail("Could not overfit a dummy binary classification under 1000 epochs") + if counter > epochs: + self.fail(f"Could not overfit a dummy classification under {epochs} epochs") class TrainerTest(unittest.TestCase): diff --git a/test/test_pipeline/test_losses.py b/test/test_pipeline/test_losses.py index ca3438d58..6cc669161 100644 --- a/test/test_pipeline/test_losses.py +++ b/test/test_pipeline/test_losses.py @@ -44,7 +44,7 @@ def test_losses(weighted): list_predictions = [pred_cross_entropy, torch.empty(4).random_(2), torch.randn(4)] list_names = [None, 'BCEWithLogitsLoss', None] list_targets = [torch.empty(4, dtype=torch.long).random_(4), torch.empty(4).random_(2), torch.randn(4)] - labels = [torch.empty(20, dtype=torch.long).random_(4), torch.empty(12, dtype=torch.long).random_(2), None] + labels = [torch.empty(100, dtype=torch.long).random_(4), torch.empty(100, dtype=torch.long).random_(2), None] for dataset_properties, pred, target, name, label in zip(list_properties, list_predictions, list_targets, list_names, labels): loss = get_loss_instance(dataset_properties=dataset_properties, name=name) diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index 8f87d62ca..260587adb 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -13,6 +13,7 @@ import torch +from autoPyTorch import metrics from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import get_preprocess_transforms from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline from autoPyTorch.utils.common import FitRequirement @@ -20,9 +21,9 @@ parse_hyperparameter_search_space_updates -@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only', - 'fit_dictionary_categorical_only', - 'fit_dictionary_num_and_categorical'], indirect=True) +@pytest.mark.parametrize("fit_dictionary_tabular", ['classification_categorical_only', + 'classification_numerical_only', + 'classification_numerical_and_categorical'], indirect=True) class TestTabularClassification: def _assert_pipeline_search_space(self, pipeline, search_space_updates): config_space = pipeline.get_hyperparameter_search_space() @@ -44,16 +45,16 @@ def _assert_pipeline_search_space(self, pipeline, search_space_updates): elif isinstance(hyperparameter, CategoricalHyperparameter): assert update.value_range == hyperparameter.choices - def test_pipeline_fit(self, fit_dictionary): + def test_pipeline_fit(self, fit_dictionary_tabular): """This test makes sure that the pipeline is able to fit given random combinations of hyperparameters across the pipeline""" pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) + dataset_properties=fit_dictionary_tabular['dataset_properties']) cs = pipeline.get_hyperparameter_search_space() config = cs.sample_configuration() pipeline.set_hyperparameters(config) - pipeline.fit(fit_dictionary) + pipeline.fit(fit_dictionary_tabular) # To make sure we fitted the model, there should be a # run summary object with accuracy @@ -68,43 +69,79 @@ def test_pipeline_fit(self, fit_dictionary): # Make sure a network was fit assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module) - def test_pipeline_predict(self, fit_dictionary): - """This test makes sure that the pipeline is able to fit - given random combinations of hyperparameters across the pipeline""" - X = fit_dictionary['X_train'].copy() + @pytest.mark.parametrize("fit_dictionary_tabular_dummy", ["classification"], indirect=True) + def test_pipeline_score(self, fit_dictionary_tabular_dummy, fit_dictionary_tabular): + """This test makes sure that the pipeline is able to achieve a decent score on dummy data + given the default configuration""" + X = fit_dictionary_tabular_dummy['X_train'].copy() + y = fit_dictionary_tabular_dummy['y_train'].copy() pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) + dataset_properties=fit_dictionary_tabular_dummy['dataset_properties']) + + cs = pipeline.get_hyperparameter_search_space() + config = cs.get_default_configuration() + pipeline.set_hyperparameters(config) + + pipeline.fit(fit_dictionary_tabular_dummy) + + # we expect the output to have the same batch size as the test input, + # and number of outputs per batch sample equal to the number of classes ("num_classes" in dataset_properties) + expected_output_shape = (X.shape[0], + fit_dictionary_tabular_dummy["dataset_properties"]["output_shape"]) + + prediction = pipeline.predict(X) + assert isinstance(prediction, np.ndarray) + assert prediction.shape == expected_output_shape + + # we should be able to get a decent score on this dummy data + accuracy = metrics.accuracy(y, prediction.squeeze()) + assert accuracy >= 0.8 + + def test_pipeline_predict(self, fit_dictionary_tabular): + """This test makes sure that the pipeline is able to predict + given a random configuration""" + X = fit_dictionary_tabular['X_train'].copy() + pipeline = TabularClassificationPipeline( + dataset_properties=fit_dictionary_tabular['dataset_properties']) cs = pipeline.get_hyperparameter_search_space() config = cs.sample_configuration() pipeline.set_hyperparameters(config) - pipeline.fit(fit_dictionary) + pipeline.fit(fit_dictionary_tabular) + + # we expect the output to have the same batch size as the test input, + # and number of outputs per batch sample equal to the number of outputs + expected_output_shape = (X.shape[0], fit_dictionary_tabular["dataset_properties"]["output_shape"]) prediction = pipeline.predict(X) assert isinstance(prediction, np.ndarray) - assert prediction.shape == (200, 2) + assert prediction.shape == expected_output_shape - def test_pipeline_predict_proba(self, fit_dictionary): + def test_pipeline_predict_proba(self, fit_dictionary_tabular): """This test makes sure that the pipeline is able to fit given random combinations of hyperparameters across the pipeline And then predict using predict probability """ - X = fit_dictionary['X_train'].copy() + X = fit_dictionary_tabular['X_train'].copy() pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) + dataset_properties=fit_dictionary_tabular['dataset_properties']) cs = pipeline.get_hyperparameter_search_space() config = cs.sample_configuration() pipeline.set_hyperparameters(config) - pipeline.fit(fit_dictionary) + pipeline.fit(fit_dictionary_tabular) + + # we expect the output to have the same batch size as the test input, + # and number of outputs per batch sample equal to the number of classes ("num_classes" in dataset_properties) + expected_output_shape = (X.shape[0], fit_dictionary_tabular["dataset_properties"]["output_shape"]) prediction = pipeline.predict_proba(X) assert isinstance(prediction, np.ndarray) - assert prediction.shape == (200, 2) + assert prediction.shape == expected_output_shape - def test_pipeline_transform(self, fit_dictionary): + def test_pipeline_transform(self, fit_dictionary_tabular): """ In the context of autopytorch, transform expands a fit dictionary with components that where previously fit. We can use this as a nice way to make sure @@ -113,69 +150,70 @@ def test_pipeline_transform(self, fit_dictionary): """ pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) + dataset_properties=fit_dictionary_tabular['dataset_properties']) cs = pipeline.get_hyperparameter_search_space() config = cs.sample_configuration() pipeline.set_hyperparameters(config) # We do not want to make the same early preprocessing operation to the fit dictionary - pipeline.fit(fit_dictionary.copy()) + pipeline.fit(fit_dictionary_tabular.copy()) - transformed_fit_dictionary = pipeline.transform(fit_dictionary) + transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular) # First, we do not lose anyone! (We use a fancy subset containment check) - assert fit_dictionary.items() <= transformed_fit_dictionary.items() + assert fit_dictionary_tabular.items() <= transformed_fit_dictionary_tabular.items() # Then the pipeline should have added the following keys expected_keys = {'imputer', 'encoder', 'scaler', 'tabular_transformer', 'preprocess_transforms', 'network', 'optimizer', 'lr_scheduler', 'train_data_loader', 'val_data_loader', 'run_summary'} - assert expected_keys.issubset(set(transformed_fit_dictionary.keys())) + assert expected_keys.issubset(set(transformed_fit_dictionary_tabular.keys())) # Then we need to have transformations being created. - assert len(get_preprocess_transforms(transformed_fit_dictionary)) > 0 + assert len(get_preprocess_transforms(transformed_fit_dictionary_tabular)) > 0 # We expect the transformations to be in the pipeline at anytime for inference - assert 'preprocess_transforms' in transformed_fit_dictionary.keys() + assert 'preprocess_transforms' in transformed_fit_dictionary_tabular.keys() @pytest.mark.parametrize("is_small_preprocess", [True, False]) - def test_default_configuration(self, fit_dictionary, is_small_preprocess): + def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess): """Makes sure that when no config is set, we can trust the default configuration from the space""" - fit_dictionary['dataset_properties']['is_small_preprocess'] = is_small_preprocess + fit_dictionary_tabular['is_small_preprocess'] = is_small_preprocess + pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) - pipeline.fit(fit_dictionary) + dataset_properties=fit_dictionary_tabular['dataset_properties']) + + pipeline.fit(fit_dictionary_tabular) - def test_remove_key_check_requirements(self, fit_dictionary): + def test_remove_key_check_requirements(self, fit_dictionary_tabular): """Makes sure that when a key is removed from X, correct error is outputted""" pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) - for key in ['num_run', 'device', 'split_id', 'use_pynisher', 'torch_num_threads', - 'dataset_properties', ]: - fit_dictionary_copy = fit_dictionary.copy() - fit_dictionary_copy.pop(key) + dataset_properties=fit_dictionary_tabular['dataset_properties']) + for key in ['num_run', 'device', 'split_id', 'use_pynisher', 'torch_num_threads', 'dataset_properties']: + fit_dictionary_tabular_copy = fit_dictionary_tabular.copy() + fit_dictionary_tabular_copy.pop(key) with pytest.raises(ValueError, match=r"To fit .+?, expected fit dictionary to have"): - pipeline.fit(fit_dictionary_copy) + pipeline.fit(fit_dictionary_tabular_copy) - def test_network_optimizer_lr_handshake(self, fit_dictionary): + def test_network_optimizer_lr_handshake(self, fit_dictionary_tabular): """Fitting a network should put the network in the X""" # Create the pipeline to check. A random config should be sufficient pipeline = TabularClassificationPipeline( - dataset_properties=fit_dictionary['dataset_properties']) + dataset_properties=fit_dictionary_tabular['dataset_properties']) cs = pipeline.get_hyperparameter_search_space() config = cs.sample_configuration() pipeline.set_hyperparameters(config) # Make sure that fitting a network adds a "network" to X assert 'network' in pipeline.named_steps.keys() - fit_dictionary['network_backbone'] = torch.nn.Linear(3, 4) - fit_dictionary['network_head'] = torch.nn.Linear(4, 1) + fit_dictionary_tabular['network_backbone'] = torch.nn.Linear(3, 4) + fit_dictionary_tabular['network_head'] = torch.nn.Linear(4, 1) X = pipeline.named_steps['network'].fit( - fit_dictionary, + fit_dictionary_tabular, None - ).transform(fit_dictionary) + ).transform(fit_dictionary_tabular) assert 'network' in X # Then fitting a optimizer should fail if no network: @@ -202,7 +240,7 @@ def test_network_optimizer_lr_handshake(self, fit_dictionary): X = pipeline.named_steps['lr_scheduler'].fit(X, None).transform(X) assert 'optimizer' in X - def test_get_fit_requirements(self, fit_dictionary): + def test_get_fit_requirements(self, fit_dictionary_tabular): dataset_properties = {'numerical_columns': [], 'categorical_columns': [], 'task_type': 'tabular_classification'} pipeline = TabularClassificationPipeline(dataset_properties=dataset_properties) @@ -213,14 +251,14 @@ def test_get_fit_requirements(self, fit_dictionary): for requirement in fit_requirements: assert isinstance(requirement, FitRequirement) - def test_apply_search_space_updates(self, fit_dictionary, search_space_updates): + def test_apply_search_space_updates(self, fit_dictionary_tabular, search_space_updates): dataset_properties = {'numerical_columns': [1], 'categorical_columns': [2], 'task_type': 'tabular_classification'} pipeline = TabularClassificationPipeline(dataset_properties=dataset_properties, search_space_updates=search_space_updates) self._assert_pipeline_search_space(pipeline, search_space_updates) - def test_read_and_update_search_space(self, fit_dictionary, search_space_updates): + def test_read_and_update_search_space(self, fit_dictionary_tabular, search_space_updates): import tempfile path = tempfile.gettempdir() path = os.path.join(path, 'updates.txt') @@ -237,7 +275,7 @@ def test_read_and_update_search_space(self, fit_dictionary, search_space_updates search_space_updates=file_search_space_updates) assert file_search_space_updates == pipeline.search_space_updates - def test_error_search_space_updates(self, fit_dictionary, error_search_space_updates): + def test_error_search_space_updates(self, fit_dictionary_tabular, error_search_space_updates): dataset_properties = {'numerical_columns': [1], 'categorical_columns': [2], 'task_type': 'tabular_classification'} try: @@ -248,7 +286,7 @@ def test_error_search_space_updates(self, fit_dictionary, error_search_space_upd assert re.match(r'Unknown hyperparameter for component .*?\. Expected update ' r'hyperparameter to be in \[.*?\] got .+', e.args[0]) - def test_set_range_search_space_updates(self, fit_dictionary): + def test_set_range_search_space_updates(self, fit_dictionary_tabular): dataset_properties = {'numerical_columns': [1], 'categorical_columns': [2], 'task_type': 'tabular_classification'} config_dict = TabularClassificationPipeline(dataset_properties=dataset_properties). \ diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py new file mode 100644 index 000000000..15b8351f9 --- /dev/null +++ b/test/test_pipeline/test_tabular_regression.py @@ -0,0 +1,310 @@ +import os +import re + +from ConfigSpace.hyperparameters import ( + CategoricalHyperparameter, + UniformFloatHyperparameter, + UniformIntegerHyperparameter, +) + +import numpy as np + +import pytest + +import torch + +from autoPyTorch import metrics +from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import get_preprocess_transforms +from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline +from autoPyTorch.utils.common import FitRequirement +from autoPyTorch.utils.hyperparameter_search_space_update import ( + HyperparameterSearchSpaceUpdate, + HyperparameterSearchSpaceUpdates, + parse_hyperparameter_search_space_updates +) + + +@pytest.mark.parametrize("fit_dictionary_tabular", ["regression_numerical_only", + "regression_categorical_only", + "regression_numerical_and_categorical" + ], indirect=True) +class TestTabularRegression: + def _assert_pipeline_search_space(self, pipeline, search_space_updates): + config_space = pipeline.get_hyperparameter_search_space() + for update in search_space_updates.updates: + try: + assert update.node_name + ':' + update.hyperparameter in config_space + hyperparameter = config_space.get_hyperparameter(update.node_name + ':' + update.hyperparameter) + except AssertionError: + assert any(update.node_name + ':' + update.hyperparameter in name + for name in config_space.get_hyperparameter_names()), \ + "Can't find hyperparameter: {}".format(update.hyperparameter) + hyperparameter = config_space.get_hyperparameter(update.node_name + ':' + update.hyperparameter + '_1') + assert update.default_value == hyperparameter.default_value + if isinstance(hyperparameter, (UniformIntegerHyperparameter, UniformFloatHyperparameter)): + assert update.value_range[0] == hyperparameter.lower + assert update.value_range[1] == hyperparameter.upper + if hasattr(update, 'log'): + assert update.log == hyperparameter.log + elif isinstance(hyperparameter, CategoricalHyperparameter): + assert update.value_range == hyperparameter.choices + + def test_pipeline_fit(self, fit_dictionary_tabular): + """This test makes sure that the pipeline is able to fit + given random combinations of hyperparameters across the pipeline""" + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular['dataset_properties']) + cs = pipeline.get_hyperparameter_search_space() + + config = cs.sample_configuration() + pipeline.set_hyperparameters(config) + pipeline.fit(fit_dictionary_tabular) + + # To make sure we fitted the model, there should be a + # run summary object with r2 + run_summary = pipeline.named_steps['trainer'].run_summary + assert run_summary is not None + + # Make sure that performance was properly captured + assert run_summary.performance_tracker['train_loss'][1] > 0 + assert run_summary.total_parameter_count > 0 + assert 'r2' in run_summary.performance_tracker['train_metrics'][1] + + # Make sure a network was fit + assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module) + + @pytest.mark.parametrize("fit_dictionary_tabular_dummy", ["regression"], indirect=True) + def test_pipeline_score(self, fit_dictionary_tabular_dummy, fit_dictionary_tabular): + """This test makes sure that the pipeline is able to achieve a decent score on dummy data + given the default configuration""" + X = fit_dictionary_tabular_dummy['X_train'].copy() + y = fit_dictionary_tabular_dummy['y_train'].copy() + + # lower the learning rate of the optimizer until seeding properly works + # with the default learning rate of 0.01 regression sometimes does not converge + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'], + search_space_updates=HyperparameterSearchSpaceUpdates([ + HyperparameterSearchSpaceUpdate("optimizer", + "AdamOptimizer:lr", + value_range=[0.0001, 0.001], + default_value=0.001) + ]) + ) + + cs = pipeline.get_hyperparameter_search_space() + config = cs.get_default_configuration() + pipeline.set_hyperparameters(config) + + pipeline.fit(fit_dictionary_tabular_dummy) + + # we expect the output to have the same batch size as the test input, + # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties) + expected_output_shape = (X.shape[0], + fit_dictionary_tabular_dummy["dataset_properties"]["output_shape"]) + + prediction = pipeline.predict(X) + assert isinstance(prediction, np.ndarray) + assert prediction.shape == expected_output_shape + + # we should be able to get a decent score on this dummy data + r2_score = metrics.r2(y, prediction) + assert r2_score >= 0.5 + + def test_pipeline_predict(self, fit_dictionary_tabular): + """This test makes sure that the pipeline is able to predict + given a random configuration""" + X = fit_dictionary_tabular['X_train'].copy() + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular['dataset_properties']) + + cs = pipeline.get_hyperparameter_search_space() + config = cs.sample_configuration() + pipeline.set_hyperparameters(config) + + pipeline.fit(fit_dictionary_tabular) + + # we expect the output to have the same batch size as the test input, + # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties) + expected_output_shape = (X.shape[0], fit_dictionary_tabular["dataset_properties"]["output_shape"]) + + prediction = pipeline.predict(X) + assert isinstance(prediction, np.ndarray) + assert prediction.shape == expected_output_shape + + def test_pipeline_transform(self, fit_dictionary_tabular): + """ + In the context of autopytorch, transform expands a fit dictionary with + components that where previously fit. We can use this as a nice way to make sure + that fit properly work. + This code is added in light of components not properly added to the fit dictionary + """ + + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular['dataset_properties']) + cs = pipeline.get_hyperparameter_search_space() + config = cs.sample_configuration() + pipeline.set_hyperparameters(config) + + # We do not want to make the same early preprocessing operation to the fit dictionary + pipeline.fit(fit_dictionary_tabular.copy()) + + transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular) + + # First, we do not lose anyone! (We use a fancy subset containment check) + assert fit_dictionary_tabular.items() <= transformed_fit_dictionary_tabular.items() + + # Then the pipeline should have added the following keys + expected_keys = {'imputer', 'encoder', 'scaler', 'tabular_transformer', + 'preprocess_transforms', 'network', 'optimizer', 'lr_scheduler', + 'train_data_loader', 'val_data_loader', 'run_summary'} + assert expected_keys.issubset(set(transformed_fit_dictionary_tabular.keys())) + + # Then we need to have transformations being created. + assert len(get_preprocess_transforms(transformed_fit_dictionary_tabular)) > 0 + + # We expect the transformations to be in the pipeline at anytime for inference + assert 'preprocess_transforms' in transformed_fit_dictionary_tabular.keys() + + @pytest.mark.parametrize("is_small_preprocess", [True, False]) + def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess): + """Makes sure that when no config is set, we can trust the + default configuration from the space""" + + fit_dictionary_tabular['is_small_preprocess'] = is_small_preprocess + + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular['dataset_properties']) + + pipeline.fit(fit_dictionary_tabular) + + def test_remove_key_check_requirements(self, fit_dictionary_tabular): + """Makes sure that when a key is removed from X, correct error is outputted""" + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular['dataset_properties']) + for key in ['num_run', 'device', 'split_id', 'use_pynisher', 'torch_num_threads', 'dataset_properties']: + fit_dictionary_tabular_copy = fit_dictionary_tabular.copy() + fit_dictionary_tabular_copy.pop(key) + with pytest.raises(ValueError, match=r"To fit .+?, expected fit dictionary to have"): + pipeline.fit(fit_dictionary_tabular_copy) + + def test_network_optimizer_lr_handshake(self, fit_dictionary_tabular): + """Fitting a network should put the network in the X""" + # Create the pipeline to check. A random config should be sufficient + pipeline = TabularRegressionPipeline( + dataset_properties=fit_dictionary_tabular['dataset_properties']) + cs = pipeline.get_hyperparameter_search_space() + config = cs.sample_configuration() + pipeline.set_hyperparameters(config) + + # Make sure that fitting a network adds a "network" to X + assert 'network' in pipeline.named_steps.keys() + fit_dictionary_tabular['network_backbone'] = torch.nn.Linear(3, 4) + fit_dictionary_tabular['network_head'] = torch.nn.Linear(4, 1) + X = pipeline.named_steps['network'].fit( + fit_dictionary_tabular, + None + ).transform(fit_dictionary_tabular) + assert 'network' in X + + # Then fitting a optimizer should fail if no network: + assert 'optimizer' in pipeline.named_steps.keys() + with pytest.raises( + ValueError, + match=r"To fit .+?, expected fit dictionary to have 'network' but got .*" + ): + pipeline.named_steps['optimizer'].fit({'dataset_properties': {}}, None) + + # No error when network is passed + X = pipeline.named_steps['optimizer'].fit(X, None).transform(X) + assert 'optimizer' in X + + # Then fitting a optimizer should fail if no network: + assert 'lr_scheduler' in pipeline.named_steps.keys() + with pytest.raises( + ValueError, + match=r"To fit .+?, expected fit dictionary to have 'optimizer' but got .*" + ): + pipeline.named_steps['lr_scheduler'].fit({'dataset_properties': {}}, None) + + # No error when network is passed + X = pipeline.named_steps['lr_scheduler'].fit(X, None).transform(X) + assert 'optimizer' in X + + def test_get_fit_requirements(self, fit_dictionary_tabular): + dataset_properties = {'numerical_columns': [], 'categorical_columns': [], + 'task_type': 'tabular_regression'} + pipeline = TabularRegressionPipeline(dataset_properties=dataset_properties) + fit_requirements = pipeline.get_fit_requirements() + + # check if fit requirements is a list of FitRequirement named tuples + assert isinstance(fit_requirements, list) + for requirement in fit_requirements: + assert isinstance(requirement, FitRequirement) + + def test_apply_search_space_updates(self, fit_dictionary_tabular, search_space_updates): + dataset_properties = {'numerical_columns': [1], 'categorical_columns': [2], + 'task_type': 'tabular_regression'} + pipeline = TabularRegressionPipeline(dataset_properties=dataset_properties, + search_space_updates=search_space_updates) + self._assert_pipeline_search_space(pipeline, search_space_updates) + + def test_read_and_update_search_space(self, fit_dictionary_tabular, search_space_updates): + import tempfile + path = tempfile.gettempdir() + path = os.path.join(path, 'updates.txt') + # Write to disk + search_space_updates.save_as_file(path=path) + assert os.path.exists(path=path) + + # Read from disk + file_search_space_updates = parse_hyperparameter_search_space_updates(updates_file=path) + assert isinstance(file_search_space_updates, HyperparameterSearchSpaceUpdates) + dataset_properties = {'numerical_columns': [1], 'categorical_columns': [2], + 'task_type': 'tabular_regression'} + pipeline = TabularRegressionPipeline(dataset_properties=dataset_properties, + search_space_updates=file_search_space_updates) + assert file_search_space_updates == pipeline.search_space_updates + + def test_error_search_space_updates(self, fit_dictionary_tabular, error_search_space_updates): + dataset_properties = {'numerical_columns': [1], 'categorical_columns': [2], + 'task_type': 'tabular_regression'} + try: + _ = TabularRegressionPipeline(dataset_properties=dataset_properties, + search_space_updates=error_search_space_updates) + except Exception as e: + assert isinstance(e, ValueError) + assert re.match(r'Unknown hyperparameter for component .*?\. Expected update ' + r'hyperparameter to be in \[.*?\] got .+', e.args[0]) + + def test_set_range_search_space_updates(self, fit_dictionary_tabular): + dataset_properties = {'numerical_columns': [1], 'categorical_columns': [2], + 'task_type': 'tabular_regression'} + config_dict = TabularRegressionPipeline(dataset_properties=dataset_properties). \ + get_hyperparameter_search_space()._hyperparameters + updates = HyperparameterSearchSpaceUpdates() + for i, (name, hyperparameter) in enumerate(config_dict.items()): + if '__choice__' in name: + continue + name = name.split(':') + hyperparameter_name = ':'.join(name[1:]) + if '_' in hyperparameter_name: + if any(l_.isnumeric() for l_ in hyperparameter_name.split('_')[-1]) and 'network' in name[0]: + hyperparameter_name = '_'.join(hyperparameter_name.split('_')[:-1]) + if isinstance(hyperparameter, CategoricalHyperparameter): + value_range = (hyperparameter.choices[0],) + default_value = hyperparameter.choices[0] + else: + value_range = (0, 1) + default_value = 1 + updates.append(node_name=name[0], hyperparameter=hyperparameter_name, + value_range=value_range, default_value=default_value) + pipeline = TabularRegressionPipeline(dataset_properties=dataset_properties, + search_space_updates=updates) + + try: + self._assert_pipeline_search_space(pipeline, updates) + except AssertionError as e: + # As we are setting num_layers to 1 for fully connected + # head, units_layer does not exist in the configspace + assert 'fully_connected:units_layer' in e.args[0]