diff --git a/doc/esmvalcore/recipe.rst b/doc/esmvalcore/recipe.rst index 6ba46e1848..db270c22d6 100644 --- a/doc/esmvalcore/recipe.rst +++ b/doc/esmvalcore/recipe.rst @@ -101,7 +101,7 @@ Here it is an example concatenating the `historical` experiment with `rcp85` .. code-block:: yaml datasets: - - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: r1i1p1, start_year: 2001, end_year: 2004} + - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: r1i1p1, start_year: 2001, end_year: 2004} It is also possible to define the ensemble as a list, although it is useful only case the two experiments have different ensemble names @@ -109,7 +109,27 @@ case the two experiments have different ensemble names .. code-block:: yaml datasets: - - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004} + - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004} + +ESMValTool also supports a simplified syntax to add multiple ensemble members from the same dataset. +In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive), +adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1 +you can use the following abreviatted syntax: + +.. code-block:: yaml + + datasets: + - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:10)i1p1, start_year: 2001, end_year: 2004} + +It can be included multiple times in one definition. For example, to generate the datasets definitions +for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use: + +.. code-block:: yaml + + datasets: + - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:5)i(1:2)p1, start_year: 2001, end_year: 2004} + +Please, bear in mind that this syntax can only be used in the ensemble tag. Note that this section is not required, as datasets can also be provided in the Diagnostics_ section. diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ef2f3b0238..89999a498d 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -2,6 +2,7 @@ import fnmatch import logging import os +import re from collections import OrderedDict from copy import deepcopy @@ -851,6 +852,7 @@ class Recipe: info_keys = ('project', 'dataset', 'exp', 'ensemble', 'version') """List of keys to be used to compose the alias, ordered by priority.""" + def __init__(self, raw_recipe, config_user, @@ -927,10 +929,33 @@ def _initialize_datasets(raw_datasets): for dataset in datasets: for key in dataset: DATASET_KEYS.add(key) - - check.duplicate_datasets(datasets) return datasets + @staticmethod + def _expand_ensemble(variables): + """ + Expand ensemble members to multiple datasets + + Expansion only support ensembles defined as strings, not lists + """ + expanded = [] + regex = re.compile(r'\(\d+:\d+\)') + for variable in variables: + ensemble = variable.get('ensemble', "") + if not isinstance(ensemble, str): + expanded.append(variable) + continue + match = regex.search(ensemble) + if not match: + expanded.append(variable) + continue + start, end = match.group(0)[1: -1].split(':') + for i in range(int(start), int(end) + 1): + expand = deepcopy(variable) + expand['ensemble'] = regex.sub(str(i), ensemble, 1) + expanded.append(expand) + return expanded + def _initialize_variables(self, raw_variable, raw_datasets): """Define variables for all datasets.""" variables = [] @@ -938,10 +963,12 @@ def _initialize_variables(self, raw_variable, raw_datasets): raw_variable = deepcopy(raw_variable) datasets = self._initialize_datasets( raw_datasets + raw_variable.pop('additional_datasets', [])) + check.duplicate_datasets(datasets) for index, dataset in enumerate(datasets): variable = deepcopy(raw_variable) variable.update(dataset) + variable['recipe_dataset_index'] = index if ('cmor_table' not in variable and variable.get('project') in CMOR_TABLES): @@ -972,7 +999,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): if activity: variable['activity'] = activity check.variable(variable, required_keys) - + variables = self._expand_ensemble(variables) return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 6cbb43e97d..e01dcf645d 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1091,6 +1091,37 @@ def test_concatenation(tmp_path, patched_datafinder, config_user): assert dataset['alias'] == 'historical-rcp85' +def test_ensemble_expansion(tmp_path, patched_datafinder, config_user): + content = dedent(""" + diagnostics: + diagnostic_name: + variables: + ta: + project: CMIP5 + mip: Amon + exp: historical + ensemble: r(1:3)i1p1 + start_year: 2000 + end_year: 2005 + grid: gn + type: reanaly + tier: 2 + version: latest + additional_datasets: + - {dataset: GFDL-CM3} + scripts: null + """) + + recipe = get_recipe(tmp_path, content, config_user) + assert len(recipe.diagnostics) == 1 + diag = recipe.diagnostics['diagnostic_name'] + var = diag['preprocessor_output']['ta'] + assert len(var) == 3 + assert var[0]['ensemble'] == 'r1i1p1' + assert var[1]['ensemble'] == 'r2i1p1' + assert var[2]['ensemble'] == 'r3i1p1' + + def test_extract_shape(tmp_path, patched_datafinder, config_user): content = dedent(""" preprocessors: @@ -1113,7 +1144,6 @@ def test_extract_shape(tmp_path, patched_datafinder, config_user): - {dataset: GFDL-CM3} scripts: null """) - # Create shapefile shapefile = config_user['auxiliary_data_dir'] / Path('test.shp') shapefile.parent.mkdir(parents=True, exist_ok=True)