Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions doc/esmvalcore/recipe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,35 @@ Here it is an example concatenating the `historical` experiment with `rcp85`
.. code-block:: yaml

datasets:
- {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: r1i1p1, start_year: 2001, end_year: 2004}
- {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: r1i1p1, start_year: 2001, end_year: 2004}

It is also possible to define the ensemble as a list, although it is useful only
case the two experiments have different ensemble names

.. code-block:: yaml

datasets:
- {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}
- {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}

ESMValTool also supports a simplified syntax to add multiple ensemble members from the same dataset.
In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive),
adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1
you can use the following abreviatted syntax:

.. code-block:: yaml

datasets:
- {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:10)i1p1, start_year: 2001, end_year: 2004}

It can be included multiple times in one definition. For example, to generate the datasets definitions
for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use:

.. code-block:: yaml

datasets:
- {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:5)i(1:2)p1, start_year: 2001, end_year: 2004}

Please, bear in mind that this syntax can only be used in the ensemble tag.

Note that this section is not required, as datasets can also be provided in the
Diagnostics_ section.
Expand Down
33 changes: 30 additions & 3 deletions esmvalcore/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import fnmatch
import logging
import os
import re
from collections import OrderedDict
from copy import deepcopy

Expand Down Expand Up @@ -851,6 +852,7 @@ class Recipe:

info_keys = ('project', 'dataset', 'exp', 'ensemble', 'version')
"""List of keys to be used to compose the alias, ordered by priority."""

def __init__(self,
raw_recipe,
config_user,
Expand Down Expand Up @@ -927,21 +929,46 @@ def _initialize_datasets(raw_datasets):
for dataset in datasets:
for key in dataset:
DATASET_KEYS.add(key)

check.duplicate_datasets(datasets)
return datasets

@staticmethod
def _expand_ensemble(variables):
"""
Expand ensemble members to multiple datasets

Expansion only support ensembles defined as strings, not lists
"""
expanded = []
regex = re.compile(r'\(\d+:\d+\)')
for variable in variables:
ensemble = variable.get('ensemble', "")
if not isinstance(ensemble, str):
expanded.append(variable)
continue
match = regex.search(ensemble)
if not match:
expanded.append(variable)
continue
start, end = match.group(0)[1: -1].split(':')
for i in range(int(start), int(end) + 1):
expand = deepcopy(variable)
expand['ensemble'] = regex.sub(str(i), ensemble, 1)
expanded.append(expand)
return expanded

def _initialize_variables(self, raw_variable, raw_datasets):
"""Define variables for all datasets."""
variables = []

raw_variable = deepcopy(raw_variable)
datasets = self._initialize_datasets(
raw_datasets + raw_variable.pop('additional_datasets', []))
check.duplicate_datasets(datasets)

for index, dataset in enumerate(datasets):
variable = deepcopy(raw_variable)
variable.update(dataset)

variable['recipe_dataset_index'] = index
if ('cmor_table' not in variable
and variable.get('project') in CMOR_TABLES):
Expand Down Expand Up @@ -972,7 +999,7 @@ def _initialize_variables(self, raw_variable, raw_datasets):
if activity:
variable['activity'] = activity
check.variable(variable, required_keys)

variables = self._expand_ensemble(variables)
return variables

def _initialize_preprocessor_output(self, diagnostic_name, raw_variables,
Expand Down
32 changes: 31 additions & 1 deletion tests/integration/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,37 @@ def test_concatenation(tmp_path, patched_datafinder, config_user):
assert dataset['alias'] == 'historical-rcp85'


def test_ensemble_expansion(tmp_path, patched_datafinder, config_user):
content = dedent("""
diagnostics:
diagnostic_name:
variables:
ta:
project: CMIP5
mip: Amon
exp: historical
ensemble: r(1:3)i1p1
start_year: 2000
end_year: 2005
grid: gn
type: reanaly
tier: 2
version: latest
additional_datasets:
- {dataset: GFDL-CM3}
scripts: null
""")

recipe = get_recipe(tmp_path, content, config_user)
assert len(recipe.diagnostics) == 1
diag = recipe.diagnostics['diagnostic_name']
var = diag['preprocessor_output']['ta']
assert len(var) == 3
assert var[0]['ensemble'] == 'r1i1p1'
assert var[1]['ensemble'] == 'r2i1p1'
assert var[2]['ensemble'] == 'r3i1p1'


def test_extract_shape(tmp_path, patched_datafinder, config_user):
content = dedent("""
preprocessors:
Expand All @@ -1113,7 +1144,6 @@ def test_extract_shape(tmp_path, patched_datafinder, config_user):
- {dataset: GFDL-CM3}
scripts: null
""")

# Create shapefile
shapefile = config_user['auxiliary_data_dir'] / Path('test.shp')
shapefile.parent.mkdir(parents=True, exist_ok=True)
Expand Down