From 010dfbfd3092495b156a3c83272a6cb5715411ba Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 19 Aug 2019 11:45:20 +0200 Subject: [PATCH 1/4] Add option to expand ensembles --- esmvalcore/_recipe.py | 36 +++++++++++++++++++++++++++++--- esmvalcore/recipe_schema.yml | 4 ++-- tests/integration/test_recipe.py | 34 +++++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 4b240e91b5..fdc9c340a3 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -2,6 +2,7 @@ import fnmatch import logging import os +import re from collections import OrderedDict from copy import deepcopy @@ -865,10 +866,33 @@ def _initialize_datasets(raw_datasets): for dataset in datasets: for key in dataset: DATASET_KEYS.add(key) - - check.duplicate_datasets(datasets) return datasets + @staticmethod + def _expand_ensemble(variables): + """ + Expand ensemble members to multiple datasets + + Expansion only support ensembles defined as strings, not lists + """ + expanded = [] + regex = re.compile(r'\[\d+:\d+\]') + for variable in variables: + ensemble = variable.get('ensemble', "") + if not isinstance(ensemble, str): + expanded.append(variable) + continue + match = regex.search(ensemble) + if not match: + expanded.append(variable) + continue + start, end = match.group(0)[1: -1].split(':') + for i in range(int(start), int(end) + 1): + expand = deepcopy(variable) + expand['ensemble'] = regex.sub(str(i), ensemble, 1) + expanded.append(expand) + return expanded + def _initialize_variables(self, raw_variable, raw_datasets): """Define variables for all datasets.""" variables = [] @@ -876,10 +900,14 @@ def _initialize_variables(self, raw_variable, raw_datasets): raw_variable = deepcopy(raw_variable) datasets = self._initialize_datasets( raw_datasets + raw_variable.pop('additional_datasets', [])) + check.duplicate_datasets(datasets) for index, dataset in enumerate(datasets): variable = deepcopy(raw_variable) variable.update(dataset) + + + variable['recipe_dataset_index'] = index if ('cmor_table' not in variable and variable.get('project') in CMOR_TABLES): @@ -918,7 +946,9 @@ def _initialize_variables(self, raw_variable, raw_datasets): logger.info("Using fx files for var %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], variable['fx_files']) - + variables = self._expand_ensemble(variables) + for variable in variables: + print(variables) return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml index 4eea7f4b47..266f8b9f63 100644 --- a/esmvalcore/recipe_schema.yml +++ b/esmvalcore/recipe_schema.yml @@ -21,7 +21,7 @@ dataset: project: str(required=False) start_year: int(required=False, min=0000, max=10000) end_year: int(required=False, min=0000, max=10000) - ensemble: str(required=False) + ensemble: any(str(), list(str()), required=False) exp: any(str(), list(str()), required=False) mip: str(required=False) realm: str(required=False) @@ -33,7 +33,7 @@ variable: project: str(required=False) start_year: int(required=False, min=0000, max=10000) end_year: int(required=False, min=0000, max=10000) - ensemble: str(required=False) + ensemble: any(str(), list(str()), required=False) exp: any(str(), list(str()), required=False) mip: str(required=False) preprocessor: str(required=False) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 7e3b00a5f5..ce1e5e4017 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -713,7 +713,7 @@ def test_diagnostic_task_provenance( patched_datafinder, monkeypatch, config_user, - ): + ): monkeypatch.setattr(esmvalcore._config, 'TAGS', TAGS) monkeypatch.setattr(esmvalcore._recipe, 'TAGS', TAGS) monkeypatch.setattr(esmvalcore._task, 'TAGS', TAGS) @@ -845,3 +845,35 @@ def test_alias_generation(tmp_path, patched_datafinder, config_user): assert dataset['alias'] == 'OBS_1' else: assert dataset['alias'] == 'OBS_2' + + +def test_ensemble_expansion(tmp_path, patched_datafinder, config_user): + + content = dedent(""" + diagnostics: + diagnostic_name: + variables: + ta: + project: CMIP5 + mip: Amon + exp: historical + ensemble: r[1:3]i1p1 + start_year: 2000 + end_year: 2005 + grid: gn + type: reanaly + tier: 2 + version: latest + additional_datasets: + - {dataset: GFDL-CM3} + scripts: null + """) + + recipe = get_recipe(tmp_path, content, config_user) + assert len(recipe.diagnostics) == 1 + diag = recipe.diagnostics['diagnostic_name'] + var = diag['preprocessor_output']['ta'] + assert len(var) == 3 + assert var[0]['ensemble'] == 'r1i1p1' + assert var[1]['ensemble'] == 'r2i1p1' + assert var[2]['ensemble'] == 'r3i1p1' From e1fc3c2e195bd454ee630c89e9194ca4f6b7fd7a Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 19 Aug 2019 11:55:39 +0200 Subject: [PATCH 2/4] Undo schema change and fix lint --- esmvalcore/_recipe.py | 2 -- esmvalcore/recipe_schema.yml | 4 ++-- tests/integration/test_recipe.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index fdc9c340a3..baaf2862fe 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -906,8 +906,6 @@ def _initialize_variables(self, raw_variable, raw_datasets): variable = deepcopy(raw_variable) variable.update(dataset) - - variable['recipe_dataset_index'] = index if ('cmor_table' not in variable and variable.get('project') in CMOR_TABLES): diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml index 266f8b9f63..4eea7f4b47 100644 --- a/esmvalcore/recipe_schema.yml +++ b/esmvalcore/recipe_schema.yml @@ -21,7 +21,7 @@ dataset: project: str(required=False) start_year: int(required=False, min=0000, max=10000) end_year: int(required=False, min=0000, max=10000) - ensemble: any(str(), list(str()), required=False) + ensemble: str(required=False) exp: any(str(), list(str()), required=False) mip: str(required=False) realm: str(required=False) @@ -33,7 +33,7 @@ variable: project: str(required=False) start_year: int(required=False, min=0000, max=10000) end_year: int(required=False, min=0000, max=10000) - ensemble: any(str(), list(str()), required=False) + ensemble: str(required=False) exp: any(str(), list(str()), required=False) mip: str(required=False) preprocessor: str(required=False) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index ce1e5e4017..9a5d306413 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -712,8 +712,7 @@ def test_diagnostic_task_provenance( tmp_path, patched_datafinder, monkeypatch, - config_user, - ): + config_user,): monkeypatch.setattr(esmvalcore._config, 'TAGS', TAGS) monkeypatch.setattr(esmvalcore._recipe, 'TAGS', TAGS) monkeypatch.setattr(esmvalcore._task, 'TAGS', TAGS) From 35ee2e4e3a4720492f12304be2e9c330ec5a3de9 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 20 Sep 2019 16:31:20 +0200 Subject: [PATCH 3/4] Change syntax --- esmvalcore/_recipe.py | 2 +- tests/integration/test_recipe.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 8d52499f64..a971b39936 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -905,7 +905,7 @@ def _expand_ensemble(variables): Expansion only support ensembles defined as strings, not lists """ expanded = [] - regex = re.compile(r'\[\d+:\d+\]') + regex = re.compile(r'\(\d+:\d+\)') for variable in variables: ensemble = variable.get('ensemble', "") if not isinstance(ensemble, str): diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 8a7403fbd6..907cb226c7 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -984,7 +984,7 @@ def test_ensemble_expansion(tmp_path, patched_datafinder, config_user): project: CMIP5 mip: Amon exp: historical - ensemble: r[1:3]i1p1 + ensemble: r(1:3)i1p1 start_year: 2000 end_year: 2005 grid: gn From 38511d957cc4cbfc3bf02d640e20f9411b1acf99 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 1 Oct 2019 10:45:52 +0200 Subject: [PATCH 4/4] Add doc --- doc/esmvalcore/recipe.rst | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/esmvalcore/recipe.rst b/doc/esmvalcore/recipe.rst index c1396a4163..c92b84d481 100644 --- a/doc/esmvalcore/recipe.rst +++ b/doc/esmvalcore/recipe.rst @@ -101,7 +101,7 @@ Here it is an example concatenating the `historical` experiment with `rcp85` .. code-block:: yaml datasets: - - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: r1i1p1, start_year: 2001, end_year: 2004} + - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: r1i1p1, start_year: 2001, end_year: 2004} It is also possible to define the ensemble as a list, although it is useful only case the two experiments have different ensemble names @@ -109,7 +109,27 @@ case the two experiments have different ensemble names .. code-block:: yaml datasets: - - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85] ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004} + - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004} + +ESMValTool also supports a simplified syntax to add multiple ensemble members from the same dataset. +In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive), +adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1 +you can use the following abreviatted syntax: + +.. code-block:: yaml + + datasets: + - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:10)i1p1, start_year: 2001, end_year: 2004} + +It can be included multiple times in one definition. For example, to generate the datasets definitions +for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use: + +.. code-block:: yaml + + datasets: + - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r(1:5)i(1:2)p1, start_year: 2001, end_year: 2004} + +Please, bear in mind that this syntax can only be used in the ensemble tag. Note that this section is not required, as datasets can also be provided in the Diagnostics_ section.