From f5cf0287d23794b15187696cc2f48b174bb5f135 Mon Sep 17 00:00:00 2001 From: flefebv <101119442+flefebv@users.noreply.github.com> Date: Tue, 28 Oct 2025 10:02:54 +0100 Subject: [PATCH 1/6] [MRG] Gradual domain adaptation (#354) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * gradual da * gradual da * implement gradual domain adaptation * fix docstring * fix docstrings * add more comprehensive tests * Improves clarity of variable names (`X_t` -> `X_step`), and improves testing of estimator-fit condition --------- Co-authored-by: Julie Alberge Co-authored-by: Théo Gnassounou <66993815+tgnassou@users.noreply.github.com> --- README.md | 2 + examples/methods/plot_gradual_da.py | 209 +++++++++++++++++++ skada/_gradual_da.py | 309 ++++++++++++++++++++++++++++ skada/tests/test_gradual_da.py | 90 ++++++++ 4 files changed, 610 insertions(+) create mode 100644 examples/methods/plot_gradual_da.py create mode 100644 skada/_gradual_da.py create mode 100644 skada/tests/test_gradual_da.py diff --git a/README.md b/README.md index c8cc4dff..085ed9ad 100644 --- a/README.md +++ b/README.md @@ -249,3 +249,5 @@ The library is distributed under the 3-Clause BSD license. [36] Xiao, Zhiqing, Wang, Haobo, Jin, Ying, Feng, Lei, Chen, Gang, Huang, Fei, Zhao, Junbo.[SPA: A Graph Spectral Alignment Perspective for Domain Adaptation](https://arxiv.org/pdf/2310.17594). In Neurips, 2023. [37] Xie, Renchunzi, Odonnat, Ambroise, Feofanov, Vasilii, Deng, Weijian, Zhang, Jianfeng and An, Bo. [MaNo: Exploiting Matrix Norm for Unsupervised Accuracy Estimation Under Distribution Shifts](https://arxiv.org/pdf/2405.18979). In NeurIPS, 2024. + +[38] Y. He, H. Wang, B. Li, H. Zhao. [Gradual Domain Adaptation: Theory and Algorithms](https://arxiv.org/pdf/2310.13852). In Journal of Machine Learning Research, 2024. \ No newline at end of file diff --git a/examples/methods/plot_gradual_da.py b/examples/methods/plot_gradual_da.py new file mode 100644 index 00000000..289bb4d1 --- /dev/null +++ b/examples/methods/plot_gradual_da.py @@ -0,0 +1,209 @@ +# %% +""" +Gradual Domain Adaptation Using Optimal Transport +================================================= + +This example illustrates the GOAT method from [38] on a simple classification task. +However, the CNN is replaced with a MLP. + +.. [38] Y. He, H. Wang, B. Li, H. Zhao + Gradual Domain Adaptation: Theory and Algorithms in + Journal of Machine Learning Research, 2024. + +""" + +# Authors: Félix Lefebvre and Julie Alberge +# +# License: BSD 3-Clause + +# %% Imports +import matplotlib.pyplot as plt +from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.neural_network import MLPClassifier + +from skada import source_target_split +from skada._gradual_da import GradualEstimator +from skada.datasets import make_shifted_datasets + +# %% +# Generate conditional shift dataset +# ---------------------------------- + +n, m = 20, 25 # number of source and target samples +X, y, sample_domain = make_shifted_datasets( + n_samples_source=n, + n_samples_target=m, + shift="conditional_shift", + noise=0.1, + random_state=42, +) + +# %% +# Plot source and target datasets +# ------------------------------- + +X_source, X_target, y_source, y_target = source_target_split( + X, y, sample_domain=sample_domain +) +lims = (min(X[:, 0]) - 0.5, max(X[:, 0]) + 0.5, min(X[:, 1]) - 0.5, max(X[:, 1]) + 0.5) + +n_tot_source = X_source.shape[0] +n_tot_target = X_target.shape[0] + +plt.figure(1, figsize=(8, 3.5)) +plt.subplot(121) + +plt.scatter(X_source[:, 0], X_source[:, 1], c=y_source, vmax=9, cmap="tab10", alpha=0.7) +plt.title("Source domain") +plt.axis(lims) + +plt.subplot(122) +plt.scatter(X_target[:, 0], X_target[:, 1], c=y_target, vmax=9, cmap="tab10", alpha=0.7) +plt.title("Target domain") +plt.axis(lims) + +# %% +# Fit Gradual Domain Adaptation +# ----------------------------- +# +# We use a MLP classifier as the base estimator (default parameters). + +base_estimator = MLPClassifier(hidden_layer_sizes=(50, 50)) + +gradual_adapter = GradualEstimator( + n_steps=40, # number of adaptation steps + base_estimator=base_estimator, + advanced_ot_plan_sampling=True, + save_estimators=True, + save_intermediate_data=True, +) + +gradual_adapter.fit( + X, + y, + sample_domain=sample_domain, +) + +# %% +# Check results +# ------------- +# Compute accuracy on source and target with the initial +# estimator and the final estimator. + + +clfs = gradual_adapter.get_intermediate_estimators() + +ACC_source_init = clfs[0].score(X_source, y_source) +ACC_target_init = clfs[0].score(X_target, y_target) + +print(f"Initial accuracy on source domain: {ACC_source_init:.3f}") +print(f"Initial accuracy on target domain: {ACC_target_init:.3f}") +print("") + +ACC_source = gradual_adapter.score(X_source, y_source) +ACC_target = gradual_adapter.score(X_target, y_target) + +print(f"Final accuracy on source domain: {ACC_source:.3f}") +print(f"Final accuracy on target domain: {ACC_target:.3f}") + + +# %% +# Inspect intermediate states +# --------------------------- +# +# We can plot the intermediate datasets and decision boundaries. + +intermediate_data = gradual_adapter.intermediate_data_ + +fig, axes = plt.subplots(2, 4, figsize=(12, 6)) +axes = axes.ravel() + +# Define which steps to plot +steps_to_plot = [5, 10, 15, 20, 25, 30, 35, 40] + +for i, step in enumerate(steps_to_plot): + ax = axes[i] + X_step, y_step = intermediate_data[step - 1] + clf = clfs[step - 1] + + ax.scatter(X_step[:, 0], X_step[:, 1], c=y_step, vmax=9, cmap="tab10", alpha=0.7) + DecisionBoundaryDisplay.from_estimator( + clf, + X, + response_method="predict", + cmap="gray_r", + alpha=0.15, + ax=ax, + grid_resolution=200, + ) + ax.set_title(f"t = {step}") + ax.axis(lims) + +plt.tight_layout() + + +# %% +# Plot decision boundaries on source and target datasets +# ------------------------------------------------------ +# +# Now we can see how this gradual domain adaptation has changed +# the decision boundary between the source and target domains. + +figure, axis = plt.subplots(1, 2, figsize=(9, 4)) +cm = "gray_r" +DecisionBoundaryDisplay.from_estimator( + clfs[0], + X, + response_method="predict", + cmap=cm, + alpha=0.15, + ax=axis[0], + grid_resolution=200, +) +axis[0].scatter( + X_source[:, 0], + X_source[:, 1], + c=y_source, + vmax=9, + cmap="tab10", + alpha=0.7, +) +axis[0].set_title("Source domain") +DecisionBoundaryDisplay.from_estimator( + clfs[-1], + X, + response_method="predict", + cmap=cm, + alpha=0.15, + ax=axis[1], + grid_resolution=200, +) +axis[1].scatter( + X_target[:, 0], + X_target[:, 1], + c=y_target, + vmax=9, + cmap="tab10", + alpha=0.7, +) +axis[1].set_title("Target domain") + +axis[0].text( + 0.05, + 0.1, + f"Accuracy: {clfs[0].score(X_source, y_source):.1%}", + transform=axis[0].transAxes, + ha="left", + bbox={"boxstyle": "round", "facecolor": "white", "alpha": 0.5}, +) +axis[1].text( + 0.05, + 0.1, + f"Accuracy: {gradual_adapter.score(X_target, y_target):.1%}", + transform=axis[1].transAxes, + ha="left", + bbox={"boxstyle": "round", "facecolor": "white", "alpha": 0.5}, +) + +plt.show() +# %% diff --git a/skada/_gradual_da.py b/skada/_gradual_da.py new file mode 100644 index 00000000..81f19bec --- /dev/null +++ b/skada/_gradual_da.py @@ -0,0 +1,309 @@ +# Authors: Julie Alberge +# Felix Lefebvre +# +# License: BSD 3-Clause + +from copy import deepcopy + +import numpy as np +from ot import da +from sklearn.neural_network import MLPClassifier +from sklearn.utils.validation import check_is_fitted + +from skada.base import DAEstimator + + +class GradualEstimator(DAEstimator): + """Implementation of the GOAT algorithm [38]. + Gradually adapt a classifier from a source domain to a target domain using + Optimal Transport. + + Parameters + ---------- + alpha : float, default=1 + Parameter to control the number of samples generated at each step. + More precisely, at each step, we generate alpha * (n + m) samples, + where n and m are the number of source and target samples. + Only used when `advanced_ot_plan_sampling=False`. + n_steps : int, default=10 + The number of adaptation steps. + ot_method : ot.da.BaseTransport, default=SinkhornTransport + The Optimal Transport method to use. + base_estimator : BaseEstimator, default=None + The classifier to use. If None, a MLPClassifier with default parameters is + used. Note that the GOAT algorithm is designed for neural-networks methods. + advanced_ot_plan_sampling : bool, default=False + Whether to use the advanced OT plan sampling strategy. + This strategy consists in sampling at least one point per column and row + of the OT plan. This ensures a better coverage of the target domain. + This strategy is not described in the original paper. + save_estimators : bool, default=False + Whether to store the intermediate estimators. + save_intermediate_data : bool, default=False + Whether to store the intermediate generated data. + + References + ---------- + .. [38] Y. He, H. Wang, B. Li, H. Zhao + Gradual Domain Adaptation: Theory and Algorithms in + Journal of Machine Learning Research, 2024. + """ + + __metadata_request__fit = {"sample_domain": True} + __metadata_request__predict = {"sample_domain": False, "allow_source": False} + __metadata_request__predict_proba = {"sample_domain": False, "allow_source": False} + __metadata_request__predict_log_proba = { + "sample_domain": False, + "allow_source": False, + } + __metadata_request__score = {"sample_domain": False, "allow_source": False} + __metadata_request__decision_function = { + "sample_domain": False, + "allow_source": False, + } + + def __init__( + self, + alpha=1, + n_steps=10, + ot_method=da.SinkhornTransport( + reg_e=1.0, + metric="sqeuclidean", + norm=None, + max_iter=1000, + tol=1e-8, + ), + base_estimator=None, + advanced_ot_plan_sampling=False, + save_estimators=False, + save_intermediate_data=False, + ): + self.alpha = alpha + self.n_steps = n_steps + self.ot_method = ot_method + self.base_estimator = base_estimator + self.advanced_ot_plan_sampling = advanced_ot_plan_sampling + self.save_estimators = save_estimators + self.save_intermediate_data = save_intermediate_data + + def fit(self, X, y=None, *, sample_domain=None): + """Fit gradual adaptation parameters. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The data. + y : array-like, shape (n_samples,) + The labels. + sample_domain : array-like, shape (n_samples,) + The domain labels. + + Returns + ------- + self : object + Returns self. + """ + self.estimators_ = [] + source = sample_domain >= 0 + target = sample_domain < 0 + + # Compute the OT mapping between source and target + self.ot_method.fit(Xs=X[source], ys=y[source], Xt=X[target], yt=y[target]) + gamma = self.ot_method.coupling_ + if self.advanced_ot_plan_sampling: + mask_gamma = self._advanced_cut_off_gamma(gamma) + else: + mask_gamma = self._cut_off_gamma(gamma) + source = sample_domain >= 0 + if not self.base_estimator: + default_params = { + "hidden_layer_sizes": (100,), + "activation": "relu", + "solver": "adam", + "batch_size": "auto", + "learning_rate": "constant", + "learning_rate_init": 0.001, + "max_iter": 100, + } + self.base_estimator = MLPClassifier(**default_params) + self.base_estimator.fit(X[source], y[source]) + if self.save_estimators: + self.estimators_.append(deepcopy(self.base_estimator)) + if self.save_intermediate_data: + self.intermediate_data_ = [] + for step in range(1, self.n_steps + 1): + X_step, y_step = self.generate_data_at_step( + X, sample_domain, mask_gamma, step + ) + self.base_estimator.max_iter += 100 + self.base_estimator.fit(X_step, y_step) + if self.save_estimators: + self.estimators_.append(deepcopy(self.base_estimator)) + if self.save_intermediate_data: + self.intermediate_data_.append((X_step, y_step)) + return self + + def _cut_off_gamma(self, gamma): + """Cut off the OT mapping to keep only the largest values.""" + n, m = gamma.shape + # Get the self.alpha * (n + m) largest coefficients of gamma + self.max_index = int(min(self.alpha * (n + m), gamma.size - 1)) + + threshold = np.sort(gamma.flatten())[-self.max_index - 1] + return gamma >= threshold + + def _advanced_cut_off_gamma(self, gamma): + """Cut off the OT mapping to keep at least one value per row and column.""" + n, m = gamma.shape + # Keep only the largest element of each row and column + row_max = np.zeros((n, m), dtype=bool) + col_max = np.zeros((n, m), dtype=bool) + for i in range(n): + row_max[i, np.argmax(gamma[i])] = True + for j in range(m): + col_max[np.argmax(gamma[:, j]), j] = True + combined_mask = row_max | col_max + self.max_index = int(combined_mask.sum()) - 1 + return combined_mask + + def generate_data_at_step(self, X, sample_domain, mask_gamma, step): + """Generate data at a given time step. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The data. + sample_domain : array-like, shape (n_samples,) + The domain labels. + mask_gamma : array-like, shape (n_samples, n_samples) + The pruned OT mapping. + step : int + The current step. + + Returns + ------- + X_step : array-like, shape (n_intermediate_samples, n_features) + The generated data at this step. `n_intermediate_samples` is + the number of non-zero entries in `mask_gamma`. + y_step : array-like, shape (n_intermediate_samples,) + The labels for the generated data. + """ + source = sample_domain >= 0 + target = sample_domain < 0 + X_source = X[source] + X_target = X[target] + X_step = np.zeros((self.max_index + 1, X.shape[1])) + for idx, (i, j) in enumerate(np.argwhere(mask_gamma)): + X_step[idx] = (self.n_steps - step) * X_source[ + i + ] / self.n_steps + step * X_target[j] / self.n_steps + + y_step = self.base_estimator.predict( + X_step + ) # Use the classifier to predict labels + return X_step, y_step + + def get_intermediate_estimators(self): + """Return the intermediate estimators. + + Returns + ------- + estimators_ : list + The list of intermediate estimators. + """ + return self.estimators_ + + def predict(self, X, **kwargs): + """Predict labels for the input data. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The input data. + + Returns + ------- + y_pred : array-like, shape (n_samples,) + The predicted labels. + """ + if self.base_estimator is None: + raise ValueError("The model has not been fitted yet.") + + else: + check_is_fitted(self.base_estimator) + + # Use the last classifier to predict + return self.base_estimator.predict(X, **kwargs) + + def predict_proba(self, X, **kwargs): + """Predict class probabilities for the input data. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The input data. + + Returns + ------- + y_proba : array-like, shape (n_samples, n_classes) + The predicted class probabilities. + """ + if self.base_estimator is None: + raise ValueError("The model has not been fitted yet.") + + else: + check_is_fitted(self.base_estimator) + + if not hasattr(self.base_estimator, "predict_proba"): + raise ValueError("The underlying estimator does not support predict_proba.") + + return self.base_estimator.predict_proba(X, **kwargs) + + def predict_log_proba(self, X, **kwargs): + """Predict class log-probabilities for the input data. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The input data. + + Returns + ------- + y_log_proba : array-like, shape (n_samples, n_classes) + The predicted class log-probabilities. + """ + if self.base_estimator is None: + raise ValueError("The model has not been fitted yet.") + + else: + check_is_fitted(self.base_estimator) + + if not hasattr(self.base_estimator, "predict_log_proba"): + raise ValueError( + "The underlying estimator does not support predict_log_proba." + ) + + return self.base_estimator.predict_log_proba(X, **kwargs) + + def score(self, X, y): + """Compute the accuracy of the model on the given data. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The input data. + y : array-like, shape (n_samples,) + The true labels. + + Returns + ------- + score : float + The accuracy of the model. + """ + if self.base_estimator is None: + raise ValueError("The model has not been fitted yet.") + + else: + check_is_fitted(self.base_estimator) + + return (self.base_estimator.predict(X) == y).sum() / len(y) diff --git a/skada/tests/test_gradual_da.py b/skada/tests/test_gradual_da.py new file mode 100644 index 00000000..a00369d8 --- /dev/null +++ b/skada/tests/test_gradual_da.py @@ -0,0 +1,90 @@ +# Authors: Julie Alberge and Félix Lefebvre +# +# License: BSD 3-Clause + +import pytest +from sklearn.preprocessing import StandardScaler + +from skada._gradual_da import GradualEstimator +from skada._pipeline import make_da_pipeline +from skada.datasets import make_shifted_datasets +from skada.utils import check_X_y_domain + + +@pytest.mark.parametrize( + "label, n, m, advanced_ot_plan_sampling", + [ + ("binary", 15, 5, False), + ("binary", 10, 10, False), + ("binary", 5, 15, False), + ("binary", 15, 5, True), + ("binary", 10, 10, True), + ("binary", 5, 15, True), + ], +) +def test_gradual_estimator(label, n, m, advanced_ot_plan_sampling): + X, y, sample_domain = make_shifted_datasets( + n_samples_source=n, + n_samples_target=m, + shift="covariate_shift", + noise=None, + label=label, + ) + X, y, sample_domain = check_X_y_domain(X, y, sample_domain) + + clf_gradual = GradualEstimator( + n_steps=5, + advanced_ot_plan_sampling=advanced_ot_plan_sampling, + save_estimators=True, + ) + # Check that predict, predict_proba, predict_log_proba, and score + # return an error when the estimator is not fitted + with pytest.raises(ValueError): + clf_gradual.predict(X) + with pytest.raises(ValueError): + clf_gradual.predict_proba(X) + with pytest.raises(ValueError): + clf_gradual.predict_log_proba(X) + with pytest.raises(ValueError): + clf_gradual.score(X, y) + + # Fit the gradual estimator + clf_gradual.fit(X, y, sample_domain=sample_domain) + + assert ( + clf_gradual.predict(X).shape == y.shape + ), "Wrong shape of the predicted y-values (labels) when using `predict` method" + + if label == "binary": + assert clf_gradual.predict_proba(X).shape == ( + y.shape[0], + 2, + ), "Wrong shape of the output when using `predict_proba` method" + + assert clf_gradual.predict_log_proba(X).shape == ( + y.shape[0], + 2, + ), "Wrong shape of the output when using `predict_log_proba` method" + + assert clf_gradual.score(X, y) >= 0, "The score should be non-negative" + + # Test get_intermediate_estimators + intermediate_estimators = clf_gradual.get_intermediate_estimators() + assert isinstance(intermediate_estimators, list) + # n_steps=5, so there should be 5 intermediate estimators + the final one + assert len(intermediate_estimators) == 6 + + # The `GradualEstimator` should be usable with `make_da_pipeline` + manage_pipeline = False + try: + clf_gradual = make_da_pipeline( + StandardScaler(), GradualEstimator(n_steps=5) + ).fit(X, y, sample_domain=sample_domain) + manage_pipeline = True + finally: + assert manage_pipeline, "Couldn't use make_da_pipeline with GradualEstimator" + + # The `GradualEstimator` should accept sample_domain as an argument + if manage_pipeline: + clf_gradual.fit(X, y, sample_domain=sample_domain) + clf_gradual.predict(X, sample_domain=sample_domain) From c6bb43e85b12cbcd7579c390ab0b32df0837850c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Dec 2025 17:44:48 +0100 Subject: [PATCH 2/6] Bump actions/checkout from 5 to 6 in the actions group (#355) Bumps the actions group with 1 update: [actions/checkout](https://github.com/actions/checkout). Updates `actions/checkout` from 5 to 6 - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/testing.yml | 6 +++--- .github/workflows/testing_dataset.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 02191be9..95cc238b 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -25,7 +25,7 @@ jobs: # Install skada - name: Checking Out Repository # yamllint disable-line rule:indentation - uses: actions/checkout@v5 + uses: actions/checkout@v6 # Cache 'skada' datasets - name: Create/Restore Skada Data Cache id: cache-skada_datasets @@ -63,7 +63,7 @@ jobs: # Install skada - name: Checking Out Repository # yamllint disable-line rule:indentation - uses: actions/checkout@v5 + uses: actions/checkout@v6 # Install Python & Packages - uses: actions/setup-python@v6 with: @@ -96,7 +96,7 @@ jobs: # Install skada - name: Checking Out Repository # yamllint disable-line rule:indentation - uses: actions/checkout@v5 + uses: actions/checkout@v6 # Install Python & Packages - uses: actions/setup-python@v6 with: diff --git a/.github/workflows/testing_dataset.yml b/.github/workflows/testing_dataset.yml index f5ed13fb..4f945843 100644 --- a/.github/workflows/testing_dataset.yml +++ b/.github/workflows/testing_dataset.yml @@ -29,7 +29,7 @@ jobs: # Install skada - name: Checking Out Repository # yamllint disable-line rule:indentation - uses: actions/checkout@v5 + uses: actions/checkout@v6 # Install Python & Packages - uses: actions/setup-python@v6 with: From 966861a4a2787833f127cb05c1e25cfab1879a1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Gnassounou?= <66993815+tgnassou@users.noreply.github.com> Date: Thu, 15 Jan 2026 14:08:02 +0100 Subject: [PATCH 3/6] [MRG] Fixing test error coming from sklearn 1.8 release (#358) * start fixing problem * continue fixing * continue fixing * fix scorer * fix spherical kmeans * add the hook class to be pickable * fix torch import * add the hook class to be pickable * use cross_validate instead of cross_val_score * add name of parameters * fix scorer functions * fix last test * specify the parameters sample domain in deep scorer test * remove comments --- skada/_ot.py | 6 +- skada/_subspace.py | 11 +-- skada/deep/base.py | 4 +- skada/deep/tests/test_deep_scorer.py | 14 ++-- skada/deep/utils.py | 20 +++--- skada/metrics.py | 100 +++++++++++++++------------ skada/tests/test_ot.py | 2 +- skada/tests/test_scorer.py | 71 ++++++++++++++----- 8 files changed, 141 insertions(+), 87 deletions(-) diff --git a/skada/_ot.py b/skada/_ot.py index 2d35127c..cec768d6 100644 --- a/skada/_ot.py +++ b/skada/_ot.py @@ -358,7 +358,7 @@ def solve_jdot_classification( This estimator assumes that the loss function optimized by the base estimator is compatible with the given metric. For instance, if the metric is 'multinomial', the base estimator should optimize a - cross-entropy loss (e.g. LogisticRegression with multi_class='multinomial') + cross-entropy loss (e.g. LogisticRegression) or a hinge loss (e.g. SVC with kernel='linear' and one versus rest) if the metric is 'hinge'. While any estimator providing the necessary prediction functions can be used, the convergence of the fixed point is not guaranteed @@ -620,7 +620,7 @@ class JDOTClassifier(DAEstimator): This estimator assumes that the loss function optimized by the base estimator is compatible with the given metric. For instance, if the metric is 'multinomial', the base estimator should optimize a - cross-entropy loss (e.g. LogisticRegression with multi_class='multinomial') + cross-entropy loss (e.g. LogisticRegression) or a hinge loss (e.g. SVC with kernel='linear' and one versus rest) if the metric is 'hinge'. While any estimator providing the necessary prediction functions can be used, the convergence of the fixed point is not guaranteed @@ -681,7 +681,7 @@ def __init__( **kwargs, ): if base_estimator is None: - base_estimator = LogisticRegression(multi_class="multinomial") + base_estimator = LogisticRegression() else: if not hasattr(base_estimator, "fit") or not hasattr( base_estimator, "predict" diff --git a/skada/_subspace.py b/skada/_subspace.py index 7de79e6c..751195f1 100644 --- a/skada/_subspace.py +++ b/skada/_subspace.py @@ -772,7 +772,8 @@ def __init__( def _torch_cov(self, X): """Compute the covariance matrix of X using torch.""" - torch = self.torch + if self.torch: + import torch reg = self.reg n_samples, d = X.shape @@ -784,7 +785,8 @@ def _torch_cov(self, X): def _D(self, W, X_source, X_target): """Divergence objective function""" - torch = self.torch + if self.torch: + import torch Z_source = X_source @ W Z_target = X_target @ W @@ -806,7 +808,8 @@ def _D(self, W, X_source, X_target): def _F(self, W, X_source, y_source): """Subspace learning objective function""" - torch = self.torch + if self.torch: + import torch base_method = self.base_method if base_method == "pca": @@ -851,7 +854,7 @@ def fit(self, X, y=None, sample_domain=None, **kwargs): try: import torch - self.torch = torch + self.torch = True except ImportError: raise ImportError( "TransferSubspaceLearningAdapter requires pytorch to be installed." diff --git a/skada/deep/base.py b/skada/deep/base.py index 4c51c46e..2162556f 100644 --- a/skada/deep/base.py +++ b/skada/deep/base.py @@ -409,7 +409,7 @@ def __init__(self, base_module, layer_name, domain_classifier=None): self.layer_name = layer_name self.intermediate_layers = {} self._setup_hooks() - + def _setup_hooks(self): _register_forwards_hook( self.base_module_, self.intermediate_layers, [self.layer_name] @@ -1065,7 +1065,7 @@ def _initialize(self, X: torch.Tensor, y, sample_domain, sample_weight): allow_nd=True, ensure_min_samples=0, ensure_min_features=0, - force_all_finite=not self.allow_label_masks, + ensure_all_finite=not self.allow_label_masks, ) y = to_tensor(y, self.device) has_y = y != _NO_LABEL_ diff --git a/skada/deep/tests/test_deep_scorer.py b/skada/deep/tests/test_deep_scorer.py index 2bc99916..046b18da 100644 --- a/skada/deep/tests/test_deep_scorer.py +++ b/skada/deep/tests/test_deep_scorer.py @@ -62,7 +62,7 @@ def test_generic_scorer_on_deepmodel(scorer, da_dataset): estimator.predict(X_test, sample_domain=sample_domain_test, allow_source=True) estimator.predict_proba(X, sample_domain=sample_domain, allow_source=True) - scores = scorer(estimator, X, y, sample_domain) + scores = scorer(estimator, X, y, sample_domain=sample_domain) assert ~np.isnan(scores), "The score is computed" @@ -180,7 +180,9 @@ def test_dev_scorer_on_target_only(da_dataset): # without dict estimator.fit(X, unmasked_y, sample_domain=sample_domain) - scores = DeepEmbeddedValidation()(estimator, X, unmasked_y, sample_domain) + scores = DeepEmbeddedValidation()( + estimator, X, unmasked_y, sample_domain=sample_domain + ) assert ~np.isnan(scores), "The score is computed" @@ -241,7 +243,7 @@ def test_exception_layer_name(scorer, da_dataset): estimator.fit(X, y, sample_domain=sample_domain) with pytest.raises(ValueError, match="The layer_name of the estimator is not set."): - scorer(estimator, X, y, sample_domain) + scorer(estimator, X, y, sample_domain=sample_domain) def test_mano_softmax(da_dataset): @@ -271,7 +273,7 @@ def test_mano_softmax(da_dataset): estimator.predict_proba(X, sample_domain=sample_domain, allow_source=True) scorer = MaNoScorer(threshold=-1) - scorer(estimator, X, y, sample_domain) + scorer(estimator, X, y, sample_domain=sample_domain) print(scorer.chosen_normalization.lower()) assert ( scorer.chosen_normalization.lower() == "softmax" @@ -305,7 +307,7 @@ def test_mano_taylor(da_dataset): estimator.predict_proba(X, sample_domain=sample_domain, allow_source=True) scorer = MaNoScorer(threshold=float("inf")) - scorer(estimator, X, y, sample_domain) + scorer(estimator, X, y, sample_domain=sample_domain) assert ( scorer.chosen_normalization.lower() == "taylor" ), "the wrong normalization was chosen" @@ -338,7 +340,7 @@ def test_mano_output_range(da_dataset): estimator.predict_proba(X, sample_domain=sample_domain, allow_source=True) scorer = MaNoScorer(threshold=float("inf")) - score = scorer(estimator, X, y, sample_domain) + score = scorer(estimator, X, y, sample_domain=sample_domain) assert (scorer._sign * score >= 0) and ( scorer._sign * score <= 1 ), "The output range should be [-1, 0] or [0, 1]." diff --git a/skada/deep/utils.py b/skada/deep/utils.py index 40fed402..bc39d663 100644 --- a/skada/deep/utils.py +++ b/skada/deep/utils.py @@ -6,6 +6,7 @@ import numbers from functools import partial +from sklearn.base import BaseEstimator from skorch.utils import _identity import torch from torch.nn import CrossEntropyLoss @@ -13,11 +14,13 @@ from sklearn.utils.validation import check_is_fitted -def _get_intermediate_layers(intermediate_layers, layer_name): - def hook(model, input, output): - intermediate_layers[layer_name] = output.flatten(start_dim=1) +class IntermediateLayerHook: + def __init__(self, intermediate_layers, layer_name): + self.intermediate_layers = intermediate_layers + self.layer_name = layer_name - return hook + def __call__(self, module, input, output): + self.intermediate_layers[self.layer_name] = output.flatten(start_dim=1) def _register_forwards_hook(module, intermediate_layers, layer_names): @@ -28,9 +31,8 @@ def _register_forwards_hook(module, intermediate_layers, layer_names): """ for layer_name, layer_module in module.named_modules(): if layer_name in layer_names: - layer_module.register_forward_hook( - _get_intermediate_layers(intermediate_layers, layer_name) - ) + hook = IntermediateLayerHook(intermediate_layers, layer_name) + layer_module.register_forward_hook(hook) def check_generator(seed): @@ -96,8 +98,7 @@ def _infer_predict_nonlinearity(net): return _identity - -class SphericalKMeans: +class SphericalKMeans(BaseEstimator): """Spherical K-Means clustering using PyTorch. This algorithm is similar to K-Means but uses cosine similarity @@ -150,6 +151,7 @@ class SphericalKMeans: def __init__(self, n_clusters=8, n_init=10, max_iter=300, tol=1e-4, initial_centroids=None, random_state=None, device='cpu'): + super().__init__() self.n_clusters = n_clusters self.n_init = n_init self.max_iter = max_iter diff --git a/skada/metrics.py b/skada/metrics.py index 64d9f058..186018a6 100644 --- a/skada/metrics.py +++ b/skada/metrics.py @@ -14,13 +14,14 @@ from sklearn.base import clone from sklearn.linear_model import LogisticRegression from sklearn.metrics import balanced_accuracy_score, check_scoring +from sklearn.metrics._scorer import _BaseScorer from sklearn.model_selection import train_test_split from sklearn.neighbors import KernelDensity from sklearn.pipeline import Pipeline from sklearn.preprocessing import LabelEncoder, Normalizer from sklearn.utils import check_random_state from sklearn.utils.extmath import softmax -from sklearn.utils.metadata_routing import _MetadataRequester, get_routing_for_object +from sklearn.utils.metadata_routing import get_routing_for_object from ._utils import ( _DEFAULT_MASKED_TARGET_CLASSIFICATION_LABEL, @@ -32,19 +33,23 @@ from .utils import check_X_y_domain, extract_source_indices, source_target_split -# xxx(okachaiev): maybe it would be easier to reuse _BaseScorer? -# xxx(okachaiev): add proper __repr__/__str__ -# xxx(okachaiev): support clone() -class _BaseDomainAwareScorer(_MetadataRequester): - __metadata_request__score = {"sample_domain": True} +def _estimator_score(estimator, X, y=None, **kwargs): + return estimator.score(X, y) + + +class _BaseDomainAwareScorer(_BaseScorer): + def __init__(self, score_func, sign, kwargs): + if kwargs is None: + kwargs = {} + if score_func is None: + score_func = _estimator_score + super().__init__(score_func, sign, kwargs) + self.set_score_request(sample_domain=True) @abstractmethod def _score(self, estimator, X, y, sample_domain=None, **params): pass - def __call__(self, estimator, X, y=None, sample_domain=None, **params): - return self._score(estimator, X, y, sample_domain=sample_domain, **params) - class SupervisedScorer(_BaseDomainAwareScorer): """Compute score on supervised dataset. @@ -62,22 +67,18 @@ class SupervisedScorer(_BaseDomainAwareScorer): scorer object will sign-flip the outcome of the `scorer`. """ - __metadata_request__score = {"target_labels": True} - - def __init__(self, scoring=None, greater_is_better=True): - super().__init__() - self.scoring = scoring - self._sign = 1 if greater_is_better else -1 + def __init__(self, scoring=None, greater_is_better=True, kwargs=None): + super().__init__( + score_func=scoring, sign=1 if greater_is_better else -1, kwargs=kwargs + ) + self.set_score_request(target_labels=True) def _score( - self, estimator, X, y=None, sample_domain=None, target_labels=None, **params + self, _, estimator, X, y=None, sample_domain=None, target_labels=None, **params ): - scorer = check_scoring(estimator, self.scoring) - X, y, sample_domain = check_X_y_domain(X, y, sample_domain, allow_nd=True) source_idx = extract_source_indices(sample_domain) - - return self._sign * scorer( + return self._sign * self._score_func( estimator, X[~source_idx], target_labels[~source_idx], @@ -127,11 +128,13 @@ def __init__( weight_estimator=None, scoring=None, greater_is_better=True, + kwargs=None, ): - super().__init__() + super().__init__( + score_func=scoring, sign=1 if greater_is_better else -1, kwargs=kwargs + ) self.weight_estimator = weight_estimator self.scoring = scoring - self._sign = 1 if greater_is_better else -1 def _fit(self, X_source, X_target): """Fit adaptation parameters. @@ -159,7 +162,7 @@ def _fit(self, X_source, X_target): self.weight_estimator_target_.fit(X_target) return self - def _score(self, estimator, X, y, sample_domain=None, **params): + def _score(self, _, estimator, X, y, sample_domain=None, **params): scorer = check_scoring(estimator, self.scoring) if "sample_weight" not in get_routing_for_object(estimator).consumes( "score", ["sample_weight"] @@ -236,9 +239,10 @@ class PredictionEntropyScorer(_BaseDomainAwareScorer): ICLR, 2018. """ - def __init__(self, greater_is_better=False, reduction="mean"): - super().__init__() - self._sign = 1 if greater_is_better else -1 + def __init__(self, greater_is_better=False, reduction="mean", kwargs=None): + super().__init__( + score_func=None, sign=1 if greater_is_better else -1, kwargs=kwargs + ) self.reduction = reduction if self.reduction not in ["none", "mean", "sum"]: @@ -247,7 +251,7 @@ def __init__(self, greater_is_better=False, reduction="mean"): "Valid options are: 'none', 'mean', 'sum'." ) - def _score(self, estimator, X, y, sample_domain=None, **params): + def _score(self, _, estimator, X, y, sample_domain=None, **params): if not hasattr(estimator, "predict_proba"): raise AttributeError( "The estimator passed should have a 'predict_proba' method. " @@ -302,12 +306,13 @@ class SoftNeighborhoodDensity(_BaseDomainAwareScorer): International Conference on Computer Vision, 2021. """ - def __init__(self, T=0.05, greater_is_better=True): - super().__init__() + def __init__(self, T=0.05, greater_is_better=True, kwargs=None): + super().__init__( + score_func=None, sign=1 if greater_is_better else -1, kwargs=kwargs + ) self.T = T - self._sign = 1 if greater_is_better else -1 - def _score(self, estimator, X, y, sample_domain=None, **params): + def _score(self, _, estimator, X, y, sample_domain=None, **params): if not hasattr(estimator, "predict_proba"): raise AttributeError( "The estimator passed should have a 'predict_proba' method. " @@ -364,14 +369,16 @@ def __init__( loss_func=None, random_state=None, greater_is_better=False, + kwargs=None, ): - super().__init__() + super().__init__( + score_func=None, sign=1 if greater_is_better else -1, kwargs=kwargs + ) self.domain_classifier = domain_classifier self._loss_func = ( loss_func if loss_func is not None else self._no_reduc_log_loss ) self.random_state = random_state - self._sign = 1 if greater_is_better else -1 def _no_reduc_log_loss(self, y, y_pred): return np.array( @@ -389,7 +396,7 @@ def _fit_adapt(self, features, features_target): ) return self - def _score(self, estimator, X, y, sample_domain=None, **kwargs): + def _score(self, _, estimator, X, y, sample_domain=None, **kwargs): if not hasattr(estimator, "predict_proba"): raise AttributeError( "The estimator passed should have a 'predict_proba' method. " @@ -536,8 +543,11 @@ def __init__( self, source_scorer=balanced_accuracy_score, greater_is_better=True, + kwargs=None, ): - super().__init__() + super().__init__( + score_func=None, sign=1 if greater_is_better else -1, kwargs=kwargs + ) if not callable(source_scorer): raise ValueError( "The source scorer should be a callable. " @@ -545,9 +555,8 @@ def __init__( ) self.source_scorer = source_scorer - self._sign = 1 if greater_is_better else -1 - def _score(self, estimator, X, y, sample_domain=None, **params): + def _score(self, _, estimator, X, y, sample_domain=None, **params): """ Compute the score based on a circular validation strategy. @@ -692,18 +701,20 @@ def __init__( scoring=None, greater_is_better=True, random_state=None, + kwargs=None, ): - super().__init__() + super().__init__( + score_func=scoring, sign=1 if greater_is_better else -1, kwargs=kwargs + ) self.alpha = alpha self.ice_type = ice_type self.scoring = scoring - self._sign = 1 if greater_is_better else -1 self.random_state = random_state if self.ice_type not in ["both", "intra", "inter"]: raise ValueError("ice_type must be 'both', 'intra', or 'inter'") - def _score(self, estimator, X, y=None, sample_domain=None, **params): + def _score(self, _, estimator, X, y=None, sample_domain=None, **params): """ Compute the Interpolation Consistency Evaluation (ICE) score. @@ -826,17 +837,18 @@ class MaNoScorer(_BaseDomainAwareScorer): In NeurIPS, 2024. """ - def __init__(self, p=4, threshold=5, greater_is_better=True): - super().__init__() + def __init__(self, p=4, threshold=5, greater_is_better=True, kwargs=None): + super().__init__( + score_func=None, sign=1 if greater_is_better else -1, kwargs=kwargs + ) self.p = p self.threshold = threshold - self._sign = 1 if greater_is_better else -1 self.chosen_normalization = None if self.p <= 0: raise ValueError("The order of the p-norm must be positive") - def _score(self, estimator, X, y, sample_domain=None, **params): + def _score(self, _, estimator, X, y, sample_domain=None, **params): if not hasattr(estimator, "predict_proba"): raise AttributeError( "The estimator passed should have a 'predict_proba' method. " diff --git a/skada/tests/test_ot.py b/skada/tests/test_ot.py index 46ecf7e7..ade0cc09 100644 --- a/skada/tests/test_ot.py +++ b/skada/tests/test_ot.py @@ -99,7 +99,7 @@ def test_JDOTClassifier(da_multiclass_dataset, da_binary_dataset): scorer = PredictionEntropyScorer() jdot = JDOTClassifier(base_estimator=SVC(probability=True)) jdot.fit(X, y, sample_domain=sample_domain) - scorer._score(jdot, X, y, sample_domain=sample_domain) + scorer(jdot, X, y, sample_domain=sample_domain) # test raise error with np.testing.assert_raises(ValueError): diff --git a/skada/tests/test_scorer.py b/skada/tests/test_scorer.py index caf5c7ba..4c6b5d2f 100644 --- a/skada/tests/test_scorer.py +++ b/skada/tests/test_scorer.py @@ -11,7 +11,7 @@ from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.metrics import mean_squared_error -from sklearn.model_selection import ShuffleSplit, cross_validate +from sklearn.model_selection import ShuffleSplit, cross_val_score, cross_validate from sklearn.svm import SVC from skada import ( @@ -43,7 +43,7 @@ MaNoScorer(), ], ) -def test_generic_scorer(scorer, da_dataset): +def test_generic_scorer_with_cross_validate(scorer, da_dataset): X, y, sample_domain = da_dataset.pack( as_sources=["s"], as_targets=["t"], mask_target_labels=True ) @@ -66,6 +66,40 @@ def test_generic_scorer(scorer, da_dataset): assert np.all(~np.isnan(scores)), "all scores are computed" +@pytest.mark.parametrize( + "scorer", + [ + ImportanceWeightedScorer(), + PredictionEntropyScorer(), + SoftNeighborhoodDensity(), + DeepEmbeddedValidation(), + CircularValidation(), + MaNoScorer(), + ], +) +def test_generic_scorer_with_cross_val_score(scorer, da_dataset): + X, y, sample_domain = da_dataset.pack( + as_sources=["s"], as_targets=["t"], mask_target_labels=True + ) + estimator = make_da_pipeline( + DensityReweightAdapter(), + LogisticRegression() + .set_fit_request(sample_weight=True) + .set_score_request(sample_weight=True), + ) + cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0) + scores = cross_val_score( + estimator, + X, + y, + cv=cv, + params={"sample_domain": sample_domain}, + scoring=scorer, + ) + assert scores.shape[0] == 3, "evaluate 3 splits" + assert np.all(~np.isnan(scores)), "all scores are computed" + + def test_supervised_scorer(da_dataset): """`SupervisedScorer` requires unmasked target label to be available.""" X, y, sample_domain = da_dataset.pack( @@ -89,6 +123,7 @@ def test_supervised_scorer(da_dataset): cv=cv, params={"sample_domain": sample_domain, "target_labels": target_labels}, scoring=scoring, + error_score="raise", )["test_score"] assert scores.shape[0] == 3, "evaluate 3 splits" assert np.all(~np.isnan(scores)), "all scores are computed" @@ -159,17 +194,17 @@ def test_prediction_entropy_scorer_reduction(da_dataset): estimator.fit(X, y, sample_domain=sample_domain) scorer = PredictionEntropyScorer(reduction="mean") - score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain) + score_mean = scorer(estimator, X, y, sample_domain=sample_domain) assert isinstance(score_mean, float), "score_mean is not a float" scorer = PredictionEntropyScorer(reduction="sum") - score_sum = scorer._score(estimator, X, y, sample_domain=sample_domain) + score_sum = scorer(estimator, X, y, sample_domain=sample_domain) assert isinstance(score_sum, float), "score_sum is not a float" assert score_mean == pytest.approx(score_sum / X.shape[0], rel=1e-5) scorer = PredictionEntropyScorer(reduction="none") - score_none = scorer._score(estimator, X, y, sample_domain=sample_domain) + score_none = scorer(estimator, X, y, sample_domain=sample_domain) assert isinstance(score_none, np.ndarray), "score_none is not a numpy array" with pytest.raises(ValueError): @@ -179,7 +214,7 @@ def test_prediction_entropy_scorer_reduction(da_dataset): with pytest.raises(ValueError): scorer = PredictionEntropyScorer(reduction="none") scorer.reduction = "WRONG_REDUCTION" - scorer._score(estimator, X, y, sample_domain=sample_domain) + scorer(estimator, X, y, sample_domain=sample_domain) def test_circular_validation(da_dataset): @@ -199,9 +234,9 @@ def test_circular_validation(da_dataset): scorer = CircularValidation() with pytest.raises(ValueError): - scorer._score(estimator, X, unmasked_y, sample_domain=sample_domain) + scorer(estimator, X, unmasked_y, sample_domain=sample_domain) - score = scorer._score(estimator, X, y, sample_domain=sample_domain) + score = scorer(estimator, X, y, sample_domain=sample_domain) assert ~np.isnan(score), "the score is computed" # Test not callable source_scorer @@ -214,7 +249,7 @@ def test_circular_validation(da_dataset): ) estimator_dummy.fit(X, y, sample_domain=sample_domain) scorer = CircularValidation() - score = scorer._score(estimator_dummy, X, y, sample_domain=sample_domain) + score = scorer(estimator_dummy, X, y, sample_domain=sample_domain) assert ~np.isnan(score), "the score is computed" # Test regression task @@ -244,9 +279,9 @@ def test_circular_validation(da_dataset): ) with pytest.raises(ValueError): - scorer._score(estimator_regression, X, unmasked_y, sample_domain=sample_domain) + scorer(estimator_regression, X, unmasked_y, sample_domain=sample_domain) - score = scorer._score(estimator_regression, X, y, sample_domain=sample_domain) + score = scorer(estimator_regression, X, y, sample_domain=sample_domain) assert ~np.isnan(score), "the score is computed" @@ -345,7 +380,7 @@ def score(self, X, y, sample_domain=None): # Test intra-cluster case (ice_same should be NaN) scorer_intra = MixValScorer(alpha=0.55, random_state=42, ice_type="intra") - score_intra = scorer_intra._score(dummy_estimator, X, y, sample_domain) + score_intra = scorer_intra(dummy_estimator, X, y, sample_domain) assert np.isnan( score_intra ), "intra-cluster score should be NaN when all predictions are the same" @@ -354,14 +389,14 @@ def score(self, X, y, sample_domain=None): # Test inter-cluster case (ice_diff should be NaN) scorer_inter = MixValScorer(alpha=0.55, random_state=42, ice_type="inter") - score_inter = scorer_inter._score(dummy_estimator, X, y, sample_domain) + score_inter = scorer_inter(dummy_estimator, X, y, sample_domain) assert np.isnan( score_inter ), "inter-cluster score should be NaN when all predictions are the same" # Test both case with score_inter == Nan (result should be a number) scorer_both = MixValScorer(alpha=0.55, random_state=42, ice_type="both") - score_both = scorer_both._score(dummy_estimator, X, y, sample_domain) + score_both = scorer_both(dummy_estimator, X, y, sample_domain) assert not np.isnan( score_both ), "combined score should not be NaN when both intra and inter scores are NaN" @@ -391,17 +426,17 @@ def test_mano_scorer(da_dataset): estimator.fit(X, y, sample_domain=sample_domain) scorer = MaNoScorer() - score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain) + score_mean = scorer(estimator, X, y, sample_domain=sample_domain) assert isinstance(score_mean, float), "score_mean is not a float" # Test softmax normalization scorer = MaNoScorer(threshold=-1) - score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain) + score_mean = scorer(estimator, X, y, sample_domain=sample_domain) assert isinstance(score_mean, float), "score_mean is not a float" # Test softmax normalization scorer = MaNoScorer(threshold=float("inf")) - score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain) + score_mean = scorer(estimator, X, y, sample_domain=sample_domain) assert isinstance(score_mean, float), "score_mean is not a float" # Test invalid p-norm order @@ -410,7 +445,7 @@ def test_mano_scorer(da_dataset): # Test correct output range scorer = MaNoScorer() - score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain) + score_mean = scorer(estimator, X, y, sample_domain=sample_domain) assert (scorer._sign * score_mean >= 0) and ( scorer._sign * score_mean <= 1 ), "The output range should be [-1, 0] or [0, 1]." From db2b11c6424acfcff975f50ac6af31a9239e7bff Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 15 Jan 2026 14:27:10 +0100 Subject: [PATCH 4/6] MAINT fix metadata licence (#359) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * MAINT fix metadata licence * remove metadata from classifier --------- Co-authored-by: Théo Gnassounou <66993815+tgnassou@users.noreply.github.com> --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b6960294..c2d4c18c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,8 @@ description = "A Python package for domain adaptation compatible with scikit-lea authors = [ {name = "SKADA Team"}, ] -license = {file = "COPYING"} +license = "BSD-3-Clause" +license-files = ["COPYING"] readme = "README.md" requires-python = ">= 3.9" keywords = ["domain-adaptation", "scikit-learn", "pytorch", @@ -17,7 +18,6 @@ keywords = ["domain-adaptation", "scikit-learn", "pytorch", classifiers=[ "Intended Audience :: Science/Research", "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", "Programming Language :: C", "Programming Language :: Python", "Topic :: Software Development", From 040a84e7b7314cd422242b815f5c0fd5f7f59d1c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 16 Jan 2026 09:33:45 +0100 Subject: [PATCH 5/6] Bump actions/cache from 4 to 5 in the actions group (#356) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps the actions group with 1 update: [actions/cache](https://github.com/actions/cache). Updates `actions/cache` from 4 to 5 - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/cache dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Théo Gnassounou <66993815+tgnassou@users.noreply.github.com> --- .github/workflows/testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 95cc238b..aa810562 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -29,7 +29,7 @@ jobs: # Cache 'skada' datasets - name: Create/Restore Skada Data Cache id: cache-skada_datasets - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/skada_datasets key: ${{ runner.os }}-v3 From 46bc3dbca73a31b77b5a2dfd45d2f386135b77b3 Mon Sep 17 00:00:00 2001 From: lionel kusch Date: Fri, 16 Jan 2026 11:09:13 +0100 Subject: [PATCH 6/6] Update Contributing documentation (#342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Rémi Flamary Co-authored-by: Théo Gnassounou <66993815+tgnassou@users.noreply.github.com> --- CONTRIBUTING.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d9f34e6..8425c6b9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -135,6 +135,9 @@ Bonus points for contributions that include a performance analysis with a benchmark script and profiling output (please report on the mailing list or on the GitHub issue). +Remark: Adding, '\[dataset\]' in the name of your Pull Request will activate +the tests for tests of the module Dataset. + Filing bugs ----------- We use Github issues to track all bugs and feature requests; feel free to