From 010dfbfd3092495b156a3c83272a6cb5715411ba Mon Sep 17 00:00:00 2001
From: Javier Vegas-Regidor <javier.vegas@bsc.es>
Date: Mon, 19 Aug 2019 11:45:20 +0200
Subject: [PATCH 1/4] Add option to expand ensembles

---
 esmvalcore/_recipe.py            | 36 +++++++++++++++++++++++++++++---
 esmvalcore/recipe_schema.yml     |  4 ++--
 tests/integration/test_recipe.py | 34 +++++++++++++++++++++++++++++-
 3 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index 4b240e91b5..fdc9c340a3 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -2,6 +2,7 @@
 import fnmatch
 import logging
 import os
+import re
 from collections import OrderedDict
 from copy import deepcopy
 
@@ -865,10 +866,33 @@ def _initialize_datasets(raw_datasets):
         for dataset in datasets:
             for key in dataset:
                 DATASET_KEYS.add(key)
-
-        check.duplicate_datasets(datasets)
         return datasets
 
+    @staticmethod
+    def _expand_ensemble(variables):
+        """
+        Expand ensemble members to multiple datasets
+
+        Expansion only support ensembles defined as strings, not lists
+        """
+        expanded = []
+        regex = re.compile(r'\[\d+:\d+\]')
+        for variable in variables:
+            ensemble = variable.get('ensemble', "")
+            if not isinstance(ensemble, str):
+                expanded.append(variable)
+                continue
+            match = regex.search(ensemble)
+            if not match:
+                expanded.append(variable)
+                continue
+            start, end = match.group(0)[1: -1].split(':')
+            for i in range(int(start), int(end) + 1):
+                expand = deepcopy(variable)
+                expand['ensemble'] = regex.sub(str(i), ensemble, 1)
+                expanded.append(expand)
+        return expanded
+
     def _initialize_variables(self, raw_variable, raw_datasets):
         """Define variables for all datasets."""
         variables = []
@@ -876,10 +900,14 @@ def _initialize_variables(self, raw_variable, raw_datasets):
         raw_variable = deepcopy(raw_variable)
         datasets = self._initialize_datasets(
             raw_datasets + raw_variable.pop('additional_datasets', []))
+        check.duplicate_datasets(datasets)
 
         for index, dataset in enumerate(datasets):
             variable = deepcopy(raw_variable)
             variable.update(dataset)
+
+
+
             variable['recipe_dataset_index'] = index
             if ('cmor_table' not in variable
                     and variable.get('project') in CMOR_TABLES):
@@ -918,7 +946,9 @@ def _initialize_variables(self, raw_variable, raw_datasets):
                 logger.info("Using fx files for var %s of dataset %s:\n%s",
                             variable['short_name'], variable['dataset'],
                             variable['fx_files'])
-
+        variables = self._expand_ensemble(variables)
+        for variable in variables:
+            print(variables)
         return variables
 
     def _initialize_preprocessor_output(self, diagnostic_name, raw_variables,
diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml
index 4eea7f4b47..266f8b9f63 100644
--- a/esmvalcore/recipe_schema.yml
+++ b/esmvalcore/recipe_schema.yml
@@ -21,7 +21,7 @@ dataset:
   project: str(required=False)
   start_year: int(required=False, min=0000, max=10000)
   end_year: int(required=False, min=0000, max=10000)
-  ensemble: str(required=False)
+  ensemble: any(str(), list(str()), required=False)
   exp: any(str(), list(str()), required=False)
   mip: str(required=False)
   realm: str(required=False)
@@ -33,7 +33,7 @@ variable:
   project: str(required=False)
   start_year: int(required=False, min=0000, max=10000)
   end_year: int(required=False, min=0000, max=10000)
-  ensemble: str(required=False)
+  ensemble: any(str(), list(str()), required=False)
   exp: any(str(), list(str()), required=False)
   mip: str(required=False)
   preprocessor: str(required=False)
diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py
index 7e3b00a5f5..ce1e5e4017 100644
--- a/tests/integration/test_recipe.py
+++ b/tests/integration/test_recipe.py
@@ -713,7 +713,7 @@ def test_diagnostic_task_provenance(
         patched_datafinder,
         monkeypatch,
         config_user,
-        ):
+    ):
     monkeypatch.setattr(esmvalcore._config, 'TAGS', TAGS)
     monkeypatch.setattr(esmvalcore._recipe, 'TAGS', TAGS)
     monkeypatch.setattr(esmvalcore._task, 'TAGS', TAGS)
@@ -845,3 +845,35 @@ def test_alias_generation(tmp_path, patched_datafinder, config_user):
                 assert dataset['alias'] == 'OBS_1'
             else:
                 assert dataset['alias'] == 'OBS_2'
+
+
+def test_ensemble_expansion(tmp_path, patched_datafinder, config_user):
+
+    content = dedent("""
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                project: CMIP5
+                mip: Amon
+                exp: historical
+                ensemble: r[1:3]i1p1
+                start_year: 2000
+                end_year: 2005
+                grid: gn
+                type: reanaly
+                tier: 2
+                version: latest
+                additional_datasets:
+                  - {dataset: GFDL-CM3}
+            scripts: null
+        """)
+
+    recipe = get_recipe(tmp_path, content, config_user)
+    assert len(recipe.diagnostics) == 1
+    diag = recipe.diagnostics['diagnostic_name']
+    var = diag['preprocessor_output']['ta']
+    assert len(var) == 3
+    assert var[0]['ensemble'] == 'r1i1p1'
+    assert var[1]['ensemble'] == 'r2i1p1'
+    assert var[2]['ensemble'] == 'r3i1p1'

From e1fc3c2e195bd454ee630c89e9194ca4f6b7fd7a Mon Sep 17 00:00:00 2001
From: Javier Vegas-Regidor <javier.vegas@bsc.es>
Date: Mon, 19 Aug 2019 11:55:39 +0200
Subject: [PATCH 2/4] Undo schema change and fix lint

---
 esmvalcore/_recipe.py            | 2 --
 esmvalcore/recipe_schema.yml     | 4 ++--
 tests/integration/test_recipe.py | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index fdc9c340a3..baaf2862fe 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -906,8 +906,6 @@ def _initialize_variables(self, raw_variable, raw_datasets):
             variable = deepcopy(raw_variable)
             variable.update(dataset)
 
-
-
             variable['recipe_dataset_index'] = index
             if ('cmor_table' not in variable
                     and variable.get('project') in CMOR_TABLES):
diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml
index 266f8b9f63..4eea7f4b47 100644
--- a/esmvalcore/recipe_schema.yml
+++ b/esmvalcore/recipe_schema.yml
@@ -21,7 +21,7 @@ dataset:
   project: str(required=False)
   start_year: int(required=False, min=0000, max=10000)
   end_year: int(required=False, min=0000, max=10000)
-  ensemble: any(str(), list(str()), required=False)
+  ensemble: str(required=False)
   exp: any(str(), list(str()), required=False)
   mip: str(required=False)
   realm: str(required=False)
@@ -33,7 +33,7 @@ variable:
   project: str(required=False)
   start_year: int(required=False, min=0000, max=10000)
   end_year: int(required=False, min=0000, max=10000)
-  ensemble: any(str(), list(str()), required=False)
+  ensemble: str(required=False)
   exp: any(str(), list(str()), required=False)
   mip: str(required=False)
   preprocessor: str(required=False)
diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py
index ce1e5e4017..9a5d306413 100644
--- a/tests/integration/test_recipe.py
+++ b/tests/integration/test_recipe.py
@@ -712,8 +712,7 @@ def test_diagnostic_task_provenance(
         tmp_path,
         patched_datafinder,
         monkeypatch,
-        config_user,
-    ):
+        config_user,):
     monkeypatch.setattr(esmvalcore._config, 'TAGS', TAGS)
     monkeypatch.setattr(esmvalcore._recipe, 'TAGS', TAGS)
     monkeypatch.setattr(esmvalcore._task, 'TAGS', TAGS)

From 35ee2e4e3a4720492f12304be2e9c330ec5a3de9 Mon Sep 17 00:00:00 2001
From: Javier Vegas-Regidor <javier.vegas@bsc.es>
Date: Fri, 20 Sep 2019 16:31:20 +0200
Subject: [PATCH 3/4] Change syntax

---
 esmvalcore/_recipe.py            | 2 +-
 tests/integration/test_recipe.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index 8d52499f64..a971b39936 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -905,7 +905,7 @@ def _expand_ensemble(variables):
         Expansion only support ensembles defined as strings, not lists
         """
         expanded = []
-        regex = re.compile(r'\[\d+:\d+\]')
+        regex = re.compile(r'\(\d+:\d+\)')
         for variable in variables:
             ensemble = variable.get('ensemble', "")
             if not isinstance(ensemble, str):
diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py
index 8a7403fbd6..907cb226c7 100644
--- a/tests/integration/test_recipe.py
+++ b/tests/integration/test_recipe.py
@@ -984,7 +984,7 @@ def test_ensemble_expansion(tmp_path, patched_datafinder, config_user):
                 project: CMIP5
                 mip: Amon
                 exp: historical
-                ensemble: r[1:3]i1p1
+                ensemble: r(1:3)i1p1
                 start_year: 2000
                 end_year: 2005
                 grid: gn

From 38511d957cc4cbfc3bf02d640e20f9411b1acf99 Mon Sep 17 00:00:00 2001
From: Javier Vegas-Regidor <javier.vegas@bsc.es>
Date: Tue, 1 Oct 2019 10:45:52 +0200
Subject: [PATCH 4/4] Add doc

---
 doc/esmvalcore/recipe.rst | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/doc/esmvalcore/recipe.rst b/doc/esmvalcore/recipe.rst
index c1396a4163..c92b84d481 100644
--- a/doc/esmvalcore/recipe.rst
+++ b/doc/esmvalcore/recipe.rst
@@ -101,7 +101,7 @@ Here it is an example concatenating the `historical` experiment with `rcp85`
 .. code-block:: yaml
 
     datasets:
-      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: r1i1p1, start_year: 2001, end_year: 2004}
+      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: r1i1p1, start_year: 2001, end_year: 2004}
 
 It is also possible to define the ensemble as a list, although it is useful only
 case the two experiments have different ensemble names
@@ -109,7 +109,27 @@ case the two experiments have different ensemble names
 .. code-block:: yaml
 
     datasets:
-      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}
+      - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}
+
+ESMValTool also supports a simplified syntax to add multiple ensemble members from the same dataset.
+In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive),
+adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1
+you can use the following abreviatted syntax:
+
+.. code-block:: yaml
+
+    datasets:
+      - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:10)i1p1, start_year: 2001, end_year: 2004}
+
+It can be included multiple times in one definition. For example, to generate the datasets definitions
+for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use:
+
+.. code-block:: yaml
+
+    datasets:
+      - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:5)i(1:2)p1, start_year: 2001, end_year: 2004}
+
+Please, bear in mind that this syntax can only be used in the ensemble tag.
 
 Note that this section is not required, as datasets can also be provided in the
 Diagnostics_ section.