diff --git a/esmvalcore/config/configurations/defaults/extra_facets_emac.yml b/esmvalcore/config/configurations/defaults/extra_facets_emac.yml
index 169a69860f..8ac3d9d05d 100644
--- a/esmvalcore/config/configurations/defaults/extra_facets_emac.yml
+++ b/esmvalcore/config/configurations/defaults/extra_facets_emac.yml
@@ -257,9 +257,12 @@ projects:
           ta:
             raw_name: [tm1_cav, tm1_ave, tm1]
             channel: Amon
+          # tro3:
+          #   raw_name: [O3_cav, O3_ave, O3]
+          #   channel: Amon
           tro3:
-            raw_name: [O3_cav, O3_ave, O3]
-            channel: Amon
+            raw_name: [O3_p39_cav, O3_cav, O3_ave, O3]
+            channel: AERmon
           ua:
             raw_name: [um1_cav, um1_ave, um1]
             channel: Amon
diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
index 798edb36a8..520c67ddb0 100644
--- a/esmvalcore/preprocessor/__init__.py
+++ b/esmvalcore/preprocessor/__init__.py
@@ -31,7 +31,11 @@
     meridional_statistics,
     zonal_statistics,
 )
-from esmvalcore.preprocessor._compare_with_refs import bias, distance_metric
+from esmvalcore.preprocessor._compare_with_refs import (
+    bias,
+    distance_metric,
+    t_test,
+)
 from esmvalcore.preprocessor._concatenate import concatenate
 from esmvalcore.preprocessor._cycles import amplitude
 from esmvalcore.preprocessor._dask_progress import _compute_with_progress
@@ -217,6 +221,7 @@
     # Multi model statistics
     "multi_model_statistics",
     # Comparison with reference datasets
+    "t_test",
     "bias",
     "distance_metric",
     # Remove supplementary variables from cube
@@ -260,6 +265,7 @@
     "multi_model_statistics",
     "mask_multimodel",
     "mask_fillvalues",
+    "t_test",
 }
 
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 4e6fc6c180..3583a97961 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -8,13 +8,15 @@
 
 import dask
 import dask.array as da
+import dask.array.stats
 import iris.analysis
 import iris.analysis.stats
 import numpy as np
 from iris.common.metadata import CubeMetadata
-from iris.coords import CellMethod
+from iris.coords import AncillaryVariable, CellMethod
 from iris.cube import Cube, CubeList
-from scipy.stats import wasserstein_distance
+from iris.exceptions import CoordinateMultiDimError
+from scipy.stats import ttest_ind, wasserstein_distance
 
 from esmvalcore.iris_helpers import (
     ignore_iris_vague_metadata_warnings,
@@ -33,7 +35,7 @@
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    from iris.coords import Coord
+    from iris.coords import AuxCoord, Coord, DimCoord
 
     from esmvalcore.preprocessor import PreprocessorFile
 
@@ -200,6 +202,16 @@ def _calculate_bias(cube: Cube, reference: Cube, bias_type: BiasType) -> Cube:
     """Calculate bias for a single cube relative to a reference cube."""
     cube_metadata = cube.metadata
 
+    # Save ancillary variables and cell measures as they get removed in the
+    # bias calculation
+    ancillary_variables_and_dims = [
+        (a, cube.ancillary_variable_dims(a))
+        for a in cube.ancillary_variables()
+    ]
+    cell_measures_and_dims = [
+        (c, cube.cell_measure_dims(c)) for c in cube.cell_measures()
+    ]
+
     if bias_type == "absolute":
         cube = cube - reference
         new_units = cube.units
@@ -216,6 +228,12 @@ def _calculate_bias(cube: Cube, reference: Cube, bias_type: BiasType) -> Cube:
     cube.metadata = cube_metadata
     cube.units = new_units
 
+    # Reattach ancillary variables and cell measures
+    for ancillary_variable, dims in ancillary_variables_and_dims:
+        cube.add_ancillary_variable(ancillary_variable, dims)
+    for cell_measure, dims in cell_measures_and_dims:
+        cube.add_cell_measure(cell_measure, dims)
+
     return cube
 
 
@@ -597,3 +615,172 @@ def _get_emd(
     if np.ma.is_masked(arr) or np.ma.is_masked(ref_arr):
         return np.ma.masked  # this is safe because PMFs will be masked arrays
     return wasserstein_distance(bin_centers, bin_centers, arr, ref_arr)
+
+
+def t_test(
+    products: set[PreprocessorFile] | Iterable[Cube],
+    reference: Cube | None = None,
+    coordinate: str | AuxCoord | DimCoord = "time",
+    **kwargs: Any,
+) -> set[PreprocessorFile] | CubeList:
+    """Perform t-test between dataset and reference dataset.
+
+    This is a test for the null hypothesis that 2 independent samples (dataset
+    and reference dataset) have identical average (expected) values.
+
+    All input datasets need to have the same shape. To ensure this, the
+    preprocessors :func:`esmvalcore.preprocessor.regrid`  might be helpful.
+
+    Notes
+    -----
+    The reference dataset can be specified with the ``reference`` argument. If
+    ``reference`` is ``None``, exactly one input dataset in the ``products``
+    set needs to have the facet ``reference_for_t_test: true`` defined in the
+    recipe. Please do **not** specify the option ``reference`` when using this
+    preprocessor function in a recipe.
+
+    Parameters
+    ----------
+    products:
+        Input datasets/cubes for which the t-test is performed relative to a
+        reference dataset/cube.
+    reference:
+        Cube which is used as reference for the t-test calculation. If
+        ``None``, `products` needs to be a :obj:`set` of
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects and exactly
+        one dataset in ``products`` needs the facet ``reference_for_t_test:
+        true``. Do not specify this argument in a recipe.
+    coordinate:
+        Coordinate axis of the input along which to compute the statistic.
+    **kwargs:
+        Additional keyword arguments passed to :func:`scipy.stats.ttest_ind`
+        (not all input data are lazy) or :func:`dask.array.stats.ttest_ind`
+        (all input data are lazy).
+
+    Returns
+    -------
+    :
+        Output datasets/cubes. Will be a :obj:`set` of
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects if
+        ``products`` is also one, a :class:`~iris.cube.CubeList` otherwise. The
+        cubes contain an attached ancillary variable which is the p-value
+        w.r.t. the chosen t-test.
+
+    Raises
+    ------
+    ValueError
+        Not exactly one input datasets contains the facet
+        ``reference_for_t_test: true`` if ``reference=None``;
+        ``reference=None`` and the input products are given as iterable of
+        :class:`~iris.cube.Cube` objects.
+    CoordinateMultiDimError
+        ``coordinate`` is not 1D.
+
+    """
+    ref_product = None
+    all_cubes_given = all(isinstance(p, Cube) for p in products)
+
+    # Get reference cube if not explicitly given
+    if reference is None:
+        if all_cubes_given:
+            msg = (
+                "A list of Cubes is given to this preprocessor; please "
+                "specify a `reference`"
+            )
+            raise ValueError(msg)
+        (reference, ref_product) = _get_ref(products, "reference_for_t_test")
+    else:
+        ref_product = None
+
+    # If input is an Iterable of Cube objects, calculate t-test for each element
+    if all_cubes_given:
+        cubes = [
+            _calculate_t_test(c, reference, coordinate, **kwargs)
+            for c in products
+        ]
+        return CubeList(cubes)
+
+    # Otherwise, iterate over all input products, calculate t-test and adapt
+    # metadata and provenance information accordingly
+    output_products = set()
+    for product in products:
+        if product == ref_product:
+            continue
+        cube = concatenate(product.cubes)
+
+        # Calculate t-test
+        cube = _calculate_t_test(cube, reference, coordinate, **kwargs)
+
+        # Adapt metadata and provenance information
+        if ref_product is not None:
+            product.wasderivedfrom(ref_product)
+
+        product.cubes = CubeList([cube])
+        output_products.add(product)
+
+    # Add reference dataset to output
+    if ref_product is not None:
+        output_products.add(ref_product)
+
+    return output_products
+
+
+def _calculate_t_test(
+    cube: Cube,
+    reference: Cube,
+    coordinate: str | AuxCoord | DimCoord,
+    **kwargs: Any,
+) -> Cube:
+    """Calculate the t-test and attach the p-value as ancillary variable to cube."""
+    cube = cube.copy()  # do not modify input cube
+
+    if cube.coord(coordinate).ndim > 1:
+        raise CoordinateMultiDimError(cube.coord(coordinate))
+    axis = cube.coord_dims(coordinate)[0]
+    remaining_axes = tuple(sorted(set(range(cube.ndim)) - {axis}))
+    target_shape = tuple(cube.shape[a] for a in remaining_axes)
+
+    if cube.has_lazy_data() and reference.has_lazy_data():
+        t_test = dask.array.stats.ttest_ind(
+            da.ma.filled(cube.core_data(), np.nan),
+            da.ma.filled(reference.core_data(), np.nan),
+            axis=axis,
+            **kwargs,
+        )
+        # For some reason, t_test.pvalue is not a da.array, but a dask.Delayed
+        p_value_arr = da.from_delayed(t_test.pvalue, target_shape, cube.dtype)
+        p_value_arr = da.ma.masked_invalid(p_value_arr)
+    else:
+        # Avoid realizing cube.data
+        cube_data = np.ma.filled(
+            cube.lazy_data().compute() if cube.has_lazy_data() else cube.data,
+            np.nan,
+        )
+        ref_data = np.ma.filled(
+            reference.lazy_data().compute()
+            if reference.has_lazy_data()
+            else reference.data,
+            np.nan,
+        )
+        t_test = ttest_ind(cube_data, ref_data, axis=axis, **kwargs)
+        p_value_arr = np.ma.masked_invalid(t_test.pvalue)
+
+    # Ensure that float dtype from parents is inherited
+    if (
+        np.issubdtype(cube.dtype, np.floating)
+        and np.issubdtype(reference.dtype, np.floating)
+        and cube.dtype == reference.dtype
+    ):
+        p_value_arr = p_value_arr.astype(cube.dtype)
+
+    cube.add_ancillary_variable(
+        AncillaryVariable(
+            p_value_arr,
+            long_name="p-value",
+            var_name="pvalue",
+            units="1",
+        ),
+        remaining_axes,
+    )
+
+    return cube
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index d9de9c7d4b..736058febc 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -1,17 +1,25 @@
 """Unit tests for :mod:`esmvalcore.preprocessor._compare_with_refs`."""
 
+from __future__ import annotations
+
 import contextlib
+import re
+from typing import Any
 
 import dask.array as da
 import iris
 import numpy as np
 import pytest
 from cf_units import Unit
-from iris.coords import CellMeasure, CellMethod
+from iris.coords import AncillaryVariable, AuxCoord, CellMeasure, CellMethod
 from iris.cube import Cube, CubeList
-from iris.exceptions import CoordinateNotFoundError
+from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError
 
-from esmvalcore.preprocessor._compare_with_refs import bias, distance_metric
+from esmvalcore.preprocessor._compare_with_refs import (
+    bias,
+    distance_metric,
+    t_test,
+)
 from tests import PreprocessorFile
 
 
@@ -343,6 +351,41 @@ def test_bias_products_and_ref_cube(
     assert product_a.mock_ancestors == set()
 
 
+@pytest.mark.parametrize(("bias_type", "data", "units"), TEST_BIAS)
+def test_bias_cubes_preserve_metadata(
+    regular_cubes,
+    ref_cubes,
+    bias_type,
+    data,
+    units,
+):
+    """Test calculation of bias with cubes."""
+    cube = regular_cubes[0]
+    ancillary_var = AncillaryVariable([0, 1], var_name="a")
+    cell_measure = CellMeasure([0, 1], var_name="c")
+    cube.add_ancillary_variable(ancillary_var, 0)
+    cube.add_cell_measure(cell_measure, 1)
+    ref_cube = ref_cubes[0]
+
+    out_cubes = bias(regular_cubes, ref_cube, bias_type=bias_type)
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, data)
+    assert out_cube.var_name == "tas"
+    assert out_cube.standard_name == "air_temperature"
+    assert out_cube.units == units
+    assert out_cube.dim_coords == cube.dim_coords
+    assert out_cube.aux_coords == cube.aux_coords
+    assert out_cube.ancillary_variables() == [ancillary_var]
+    assert out_cube.ancillary_variable_dims(ancillary_var) == (0,)
+    assert out_cube.cell_measures() == [cell_measure]
+    assert out_cube.cell_measure_dims(cell_measure) == (1,)
+
+
 def test_no_reference_for_bias(regular_cubes, ref_cubes):
     """Test fail when no reference_for_bias is given."""
     products = {
@@ -382,8 +425,8 @@ def test_invalid_bias_type(regular_cubes, ref_cubes):
         bias(products, bias_type="invalid_bias_type")
 
 
-def test_reference_none_cubes(regular_cubes):
-    """Test reference=None with with cubes."""
+def test_bias_reference_none_cubes(regular_cubes):
+    """Test bias with reference=None and cubes given."""
     msg = (
         "A list of Cubes is given to this preprocessor; please specify a "
         "`reference`"
@@ -932,3 +975,291 @@ def test_distance_metric_no_lon_for_area_weights(regular_cubes, metric, error):
             reference=ref_cube,
             coords=["time", "latitude"],
         )
+
+
+def assert_correct_p_value(
+    cube: Cube,
+    data: Any,  # noqa: ANN401
+    *,
+    is_lazy: bool = False,
+) -> None:
+    assert len(cube.ancillary_variables()) == 1
+    p_value = cube.ancillary_variables()[0]
+    assert p_value.standard_name is None
+    assert p_value.long_name == "p-value"
+    assert p_value.var_name == "pvalue"
+    assert p_value.units == "1"
+    assert p_value.attributes == {}
+    assert p_value.has_lazy_data() is is_lazy
+    assert p_value.dtype == np.float32
+    if np.ma.is_masked(data):
+        np.testing.assert_equal(p_value.data.mask, data.mask)
+    np.testing.assert_allclose(p_value.data, data)
+
+
+@pytest.mark.parametrize("use_reference_product", [True, False])
+def test_t_test_products(regular_cubes, ref_cubes, use_reference_product):  # noqa: PLR0915
+    """Test calculation of t_test with products."""
+    products = {
+        PreprocessorFile(regular_cubes, "A", {"dataset": "a"}),
+        PreprocessorFile(regular_cubes, "B", {"dataset": "b"}),
+    }
+
+    if use_reference_product:
+        ref_product = PreprocessorFile(
+            ref_cubes,
+            "REF",
+            {"reference_for_t_test": True},
+        )
+        products.add(ref_product)
+        expected_ancestors = {ref_product}
+        reference = None
+    else:
+        expected_ancestors = set()
+        reference = ref_cubes[0]
+
+    out_products = t_test(products, reference=reference)
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == len(products)
+
+    product_a = out_dict["A"]
+    assert product_a.filename == "A"
+    assert product_a.attributes == {"dataset": "a"}
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, regular_cubes[0].data)
+    assert out_cube.var_name == "tas"
+    assert out_cube.standard_name == "air_temperature"
+    assert out_cube.units == "K"
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    assert_correct_p_value(
+        out_cube,
+        [[1.0, 0.6666667], [0.42264974, 0.46547753]],
+    )
+    assert product_a.wasderivedfrom.call_count == len(expected_ancestors)
+    assert product_a.mock_ancestors == expected_ancestors
+
+    product_b = out_dict["B"]
+    assert product_b.filename == "B"
+    assert product_b.attributes == {"dataset": "b"}
+    assert len(product_b.cubes) == 1
+    out_cube = product_b.cubes[0]
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, regular_cubes[0].data)
+    assert out_cube.var_name == "tas"
+    assert out_cube.standard_name == "air_temperature"
+    assert out_cube.units == "K"
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    assert_correct_p_value(
+        out_cube,
+        [[1.0, 0.6666667], [0.42264974, 0.46547753]],
+    )
+    assert product_b.wasderivedfrom.call_count == len(expected_ancestors)
+    assert product_b.mock_ancestors == expected_ancestors
+
+    if use_reference_product:
+        product_ref = out_dict["REF"]
+        assert product_ref.filename == "REF"
+        assert product_ref.attributes == {"reference_for_t_test": True}
+        assert len(product_ref.cubes) == 1
+        out_cube = product_ref.cubes[0]
+        assert out_cube.dtype == np.float32
+        assert_allclose(out_cube.data, ref_cubes[0].data)
+        assert out_cube.var_name == "tas"
+        assert out_cube.standard_name == "air_temperature"
+        assert out_cube.units == "K"
+        assert out_cube.dim_coords == ref_cubes[0].dim_coords
+        assert out_cube.aux_coords == ref_cubes[0].aux_coords
+        assert out_cube.ancillary_variables() == []
+        product_ref.wasderivedfrom.assert_not_called()
+        assert product_ref.mock_ancestors == set()
+
+
+@pytest.mark.parametrize("lazy", [True, False])
+def test_t_test_cubes(regular_cubes, ref_cubes, lazy):
+    """Test calculation of t_test with cubes."""
+    cube = regular_cubes[0]
+    ref_cube = ref_cubes[0]
+    if lazy:
+        cube.data = cube.lazy_data()
+        ref_cube.data = ref_cube.lazy_data()
+
+    out_cubes = t_test([cube], ref_cube)
+
+    assert cube.has_lazy_data() is lazy
+    assert ref_cube.has_lazy_data() is lazy
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.has_lazy_data() is lazy
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, cube.data)
+    assert out_cube.var_name == "tas"
+    assert out_cube.standard_name == "air_temperature"
+    assert out_cube.units == "K"
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    assert_correct_p_value(
+        out_cube,
+        [[1.0, 0.6666667], [0.42264974, 0.46547753]],
+        is_lazy=lazy,
+    )
+
+
+@pytest.mark.parametrize("lazy", [True, False])
+def test_t_test_cubes_1d(regular_cubes, ref_cubes, lazy):
+    """Test calculation of t_test with 1D cubes."""
+    cube = regular_cubes[0][0, :, 0]
+    ref_cube = ref_cubes[0][0, :, 0]
+    if lazy:
+        cube.data = cube.lazy_data()
+        ref_cube.data = ref_cube.lazy_data()
+
+    out_cubes = t_test(
+        [cube],
+        ref_cube,
+        coordinate="latitude",
+        equal_var=False,
+    )
+
+    assert cube.has_lazy_data() is lazy
+    assert ref_cube.has_lazy_data() is lazy
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.has_lazy_data() is lazy
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, cube.data)
+    assert out_cube.var_name == "tas"
+    assert out_cube.standard_name == "air_temperature"
+    assert out_cube.units == "K"
+    assert out_cube.dim_coords == cube.dim_coords
+    assert out_cube.aux_coords == cube.aux_coords
+    assert_correct_p_value(out_cube, [0.4999999701976776], is_lazy=lazy)
+
+
+@pytest.mark.parametrize("ref_lazy", [True, False])
+def test_t_test_cubes_partly_lazy(regular_cubes, ref_cubes, ref_lazy):
+    """Test calculation of t_test with cubes."""
+    cube = regular_cubes[0]
+    ref_cube = ref_cubes[0]
+    if ref_lazy:
+        ref_cube.data = ref_cube.lazy_data()
+    else:
+        cube.data = cube.lazy_data()
+
+    out_cubes = t_test([cube], ref_cube)
+
+    assert cube.has_lazy_data() is not ref_lazy
+    assert ref_cube.has_lazy_data() is ref_lazy
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.has_lazy_data() is not ref_lazy
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, cube.data)
+    assert out_cube.var_name == "tas"
+    assert out_cube.standard_name == "air_temperature"
+    assert out_cube.units == "K"
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    assert_correct_p_value(
+        out_cube,
+        [[1.0, 0.6666667], [0.42264974, 0.46547753]],
+    )
+
+
+@pytest.mark.parametrize("lazy", [True, False])
+def test_t_test_cubes_masked(regular_cubes, ref_cubes, lazy):
+    """Test calculation of t_test with cubes (masked data)."""
+    cube = regular_cubes[0]
+    cube.data = np.ma.masked_inside(cube.data, 1.5, 3.5)
+    cube.data = np.ma.masked_greater(cube.data, 6.5)
+    ref_cube = ref_cubes[0]
+    ref_cube.data = np.ma.masked_invalid(
+        np.array(
+            [[[2.0, np.nan], [2.0, 2.0]], [[2.0, 2.0], [2.0, 2.0]]],
+            dtype=np.float32,
+        ),
+    )
+    if lazy:
+        cube.data = cube.lazy_data()
+        ref_cube.data = ref_cube.lazy_data()
+
+    out_cubes = t_test([cube], ref_cube)
+
+    assert cube.has_lazy_data() is lazy
+    assert ref_cube.has_lazy_data() is lazy
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.has_lazy_data() is lazy
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, cube.data)
+    assert out_cube.var_name == "tas"
+    assert out_cube.standard_name == "air_temperature"
+    assert out_cube.units == "K"
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    assert_correct_p_value(
+        out_cube,
+        np.ma.masked_invalid([[1.0, np.nan], [np.nan, np.nan]]),
+        is_lazy=lazy,
+    )
+
+
+def test_t_test_multidim_coord_fail(regular_cubes):
+    """Test t_test with multidimensional coordinate."""
+    cube = regular_cubes[0]
+    x_coord = AuxCoord([[0, 0], [0, 0]], var_name="x")
+    cube.add_aux_coord(x_coord, (0, 1))
+    msg = r"Multi-dimensional coordinate not supported: 'x'"
+    with pytest.raises(CoordinateMultiDimError, match=re.escape(msg)):
+        t_test([cube], reference=cube, coordinate=x_coord)
+
+
+def test_t_test_reference_none_cubes_fail(regular_cubes):
+    """Test t_test with reference=None and cubes given."""
+    msg = (
+        r"A list of Cubes is given to this preprocessor; please specify a "
+        r"`reference`"
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        t_test(regular_cubes)
+
+
+def test_no_reference_for_t_test_fail(regular_cubes, ref_cubes):
+    """Test fail when no reference_for_t_test is given."""
+    products = {
+        PreprocessorFile(regular_cubes, "A", {}),
+        PreprocessorFile(regular_cubes, "B", {}),
+        PreprocessorFile(ref_cubes, "REF", {}),
+    }
+    msg = r"Expected exactly 1 dataset with 'reference_for_t_test: true', found 0"
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        t_test(products)
+
+
+def test_two_references_for_t_test_fail(regular_cubes, ref_cubes):
+    """Test fail when two reference_for_t_test products are given."""
+    products = {
+        PreprocessorFile(regular_cubes, "A", {"reference_for_t_test": False}),
+        PreprocessorFile(ref_cubes, "REF1", {"reference_for_t_test": True}),
+        PreprocessorFile(ref_cubes, "REF2", {"reference_for_t_test": True}),
+    }
+    msg = r"Expected exactly 1 dataset with 'reference_for_t_test: true', found 2"
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        t_test(products)