ESMValGroup · mattiarighi · Oct 2, 2019 · Aug 19, 2019 · Aug 19, 2019 · Sep 4, 2019
diff --git a/doc/esmvalcore/recipe.rst b/doc/esmvalcore/recipe.rst
@@ -101,15 +101,35 @@ Here it is an example concatenating the `historical` experiment with `rcp85`
 .. code-block:: yaml
 
     datasets:
-      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: r1i1p1, start_year: 2001, end_year: 2004}
+      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: r1i1p1, start_year: 2001, end_year: 2004}
 
 It is also possible to define the ensemble as a list, although it is useful only
 case the two experiments have different ensemble names
 
 .. code-block:: yaml
 
     datasets:
-      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}
+      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}
+
+ESMValTool also supports a simplified syntax to add multiple ensemble members from the same dataset.
+In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive),
+adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1
+you can use the following abreviatted syntax:
+
+.. code-block:: yaml
+
+    datasets:
+      - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:10)i1p1, start_year: 2001, end_year: 2004}
+
+It can be included multiple times in one definition. For example, to generate the datasets definitions
+for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use:
+
+.. code-block:: yaml
+
+    datasets:
+      - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:5)i(1:2)p1, start_year: 2001, end_year: 2004}
+
+Please, bear in mind that this syntax can only be used in the ensemble tag.
 
 Note that this section is not required, as datasets can also be provided in the
 Diagnostics_ section.

diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
@@ -2,6 +2,7 @@
 import fnmatch
 import logging
 import os
+import re
 from collections import OrderedDict
 from copy import deepcopy
 
@@ -851,6 +852,7 @@ class Recipe:
 
     info_keys = ('project', 'dataset', 'exp', 'ensemble', 'version')
     """List of keys to be used to compose the alias, ordered by priority."""
+
     def __init__(self,
                  raw_recipe,
                  config_user,
@@ -927,21 +929,46 @@ def _initialize_datasets(raw_datasets):
         for dataset in datasets:
             for key in dataset:
                 DATASET_KEYS.add(key)
-
-        check.duplicate_datasets(datasets)
         return datasets
 
+    @staticmethod
+    def _expand_ensemble(variables):
+        """
+        Expand ensemble members to multiple datasets
+
+        Expansion only support ensembles defined as strings, not lists
+        """
+        expanded = []
+        regex = re.compile(r'\(\d+:\d+\)')
+        for variable in variables:
+            ensemble = variable.get('ensemble', "")
+            if not isinstance(ensemble, str):
+                expanded.append(variable)
+                continue
+            match = regex.search(ensemble)
+            if not match:
+                expanded.append(variable)
+                continue
+            start, end = match.group(0)[1: -1].split(':')
+            for i in range(int(start), int(end) + 1):
+                expand = deepcopy(variable)
+                expand['ensemble'] = regex.sub(str(i), ensemble, 1)
+                expanded.append(expand)
+        return expanded
+
     def _initialize_variables(self, raw_variable, raw_datasets):
         """Define variables for all datasets."""
         variables = []
 
         raw_variable = deepcopy(raw_variable)
         datasets = self._initialize_datasets(
             raw_datasets + raw_variable.pop('additional_datasets', []))
+        check.duplicate_datasets(datasets)
 
         for index, dataset in enumerate(datasets):
             variable = deepcopy(raw_variable)
             variable.update(dataset)
+
             variable['recipe_dataset_index'] = index
             if ('cmor_table' not in variable
                     and variable.get('project') in CMOR_TABLES):
@@ -972,7 +999,7 @@ def _initialize_variables(self, raw_variable, raw_datasets):
             if activity:
                 variable['activity'] = activity
             check.variable(variable, required_keys)
-
+        variables = self._expand_ensemble(variables)
         return variables
 
     def _initialize_preprocessor_output(self, diagnostic_name, raw_variables,

diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py
@@ -1091,6 +1091,37 @@ def test_concatenation(tmp_path, patched_datafinder, config_user):
             assert dataset['alias'] == 'historical-rcp85'
 
 
+def test_ensemble_expansion(tmp_path, patched_datafinder, config_user):
+    content = dedent("""
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                project: CMIP5
+                mip: Amon
+                exp: historical
+                ensemble: r(1:3)i1p1
+                start_year: 2000
+                end_year: 2005
+                grid: gn
+                type: reanaly
+                tier: 2
+                version: latest
+                additional_datasets:
+                  - {dataset: GFDL-CM3}
+            scripts: null
+        """)
+
+    recipe = get_recipe(tmp_path, content, config_user)
+    assert len(recipe.diagnostics) == 1
+    diag = recipe.diagnostics['diagnostic_name']
+    var = diag['preprocessor_output']['ta']
+    assert len(var) == 3
+    assert var[0]['ensemble'] == 'r1i1p1'
+    assert var[1]['ensemble'] == 'r2i1p1'
+    assert var[2]['ensemble'] == 'r3i1p1'
+
+
 def test_extract_shape(tmp_path, patched_datafinder, config_user):
     content = dedent("""
         preprocessors:
@@ -1113,7 +1144,6 @@ def test_extract_shape(tmp_path, patched_datafinder, config_user):
                   - {dataset: GFDL-CM3}
             scripts: null
         """)
-
     # Create shapefile
     shapefile = config_user['auxiliary_data_dir'] / Path('test.shp')
     shapefile.parent.mkdir(parents=True, exist_ok=True)