diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 0efc7bc571..c3710141fe 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -7,7 +7,7 @@ import yaml -from .cmor.table import read_cmor_tables +from .cmor.table import read_cmor_tables, CMOR_TABLES logger = logging.getLogger(__name__) @@ -172,13 +172,30 @@ def get_project_config(project): def get_institutes(variable): - """Return the institutes given the dataset name in CMIP5.""" + """Return the institutes given the dataset name in CMIP5 and CMIP6.""" dataset = variable['dataset'] project = variable['project'] logger.debug("Retrieving institutes for dataset %s", dataset) + try: + return CMOR_TABLES[project].institutes[dataset] + except (KeyError, AttributeError): + pass return CFG.get(project, {}).get('institutes', {}).get(dataset, []) +def get_activity(variable): + """Return the activity given the experiment name in CMIP6.""" + project = variable['project'] + try: + exp = variable['exp'] + logger.debug("Retrieving activity_id for experiment %s", exp) + if isinstance(exp, list): + return [CMOR_TABLES[project].activities[value] for value in exp] + return CMOR_TABLES[project].activities[exp] + except (KeyError, AttributeError): + return None + + def replace_mip_fx(fx_file): """Replace MIP so to retrieve correct fx files.""" default_mip = 'Amon' diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 1bc696612e..0f9376ff39 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -10,7 +10,7 @@ from . import __version__ from . import _recipe_checks as check -from ._config import TAGS, get_institutes, replace_tags +from ._config import TAGS, get_institutes, get_activity, replace_tags from ._data_finder import (get_input_filelist, get_input_fx_filelist, get_output_file, get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance @@ -906,6 +906,9 @@ def _initialize_variables(self, raw_variable, raw_datasets): institute = get_institutes(variable) if institute: variable['institute'] = institute + activity = get_activity(variable) + if activity: + variable['activity'] = activity check.variable(variable, required_keys) if 'fx_files' in variable: for fx_file in variable['fx_files']: diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index 805ede3a6a..c7a115c5a6 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -73,6 +73,8 @@ def __init__(self, cmor_tables_path, default=None): cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') + if glob.glob(os.path.join(self._cmor_folder, '*_CV.json')): + self._load_controlled_vocabulary() self.default = default self.tables = {} @@ -141,6 +143,29 @@ def _load_coordinates(self): coord.read_json(table_data['axis_entry'][coord_name]) self.coords[coord_name] = coord + def _load_controlled_vocabulary(self): + self.activities = {} + self.institutes = {} + for json_file in glob.glob( + os.path.join(self._cmor_folder, '*_CV.json')): + with open(json_file) as inf: + table_data = json.loads(inf.read()) + try: + exps = table_data['CV']['experiment_id'] + for exp_id in exps: + activity = exps[exp_id]['activity_id'] + self.activities[exp_id] = activity + except (KeyError, AttributeError): + pass + + try: + sources = table_data['CV']['source_id'] + for source_id in sources: + institution = sources[source_id]['institution_id'] + self.institutes[source_id] = institution + except (KeyError, AttributeError): + pass + def get_table(self, table): """ Search and return the table info. diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index 994213f81b..16186352c1 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -21,121 +21,12 @@ CMIP6: input_dir: default: '/' - BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' - DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' + BADC: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' + DKRZ: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' - institutes: - 'ACCESS-CM2': ['CSIRO-ARCCSS-BoM'] - 'ACCESS-ESM1-5': ['CSIRO'] - 'ARTS-2-3': ['UHH'] - 'AWI-CM-1-1-HR': ['AWI'] - 'AWI-CM-1-1-LR': ['AWI'] - 'AWI-CM-1-1-MR': ['AWI'] - 'AWI-ESM-1-1-LR': ['AWI'] - 'BCC-CSM2-HR': ['BCC'] - 'BCC-CSM2-MR': ['BCC'] - 'BCC-ESM1': ['BCC'] - 'BESM-2-7': ['INPE'] - 'BNU-ESM-1-1': ['BNU'] - 'CAMS-CSM1-0': ['CAMS'] - 'CanESM5': ['CCCma'] - 'CAS-ESM1-0': ['CAS'] - 'CESM2': ['NCAR'] - 'CESM2-SE': ['NCAR'] - 'CESM2-WACCM': ['NCAR'] - 'CIESM': ['THU'] - 'CMCC-CM2-HR4': ['CMCC'] - 'CMCC-CM2-HR5': ['CMCC'] - 'CMCC-CM2-SR5': ['CMCC'] - 'CMCC-CM2-VHR4': ['CMCC'] - 'CMCC-ESM2-HR5': ['CMCC'] - 'CMCC-ESM2-SR5': ['CMCC'] - 'CNRM-CM6-1': ['CNRM-CERFACS'] - 'CNRM-CM6-1-HR': ['CNRM-CERFACS'] - 'CNRM-ESM2-1': ['CNRM-CERFACS'] - 'CNRM-ESM2-1-HR': ['CNRM-CERFACS'] - 'CSIRO-Mk3L-1-3': ['UTAS'] - 'E3SM-1-0': ['E3SM-Project'] - 'EC-Earth3-AerChem': ['EC-Earth-Consortium'] - 'EC-Earth3-CC': ['EC-Earth-Consortium'] - 'EC-Earth3': ['EC-Earth-Consortium'] - 'EC-Earth3-GrIS': ['EC-Earth-Consortium'] - 'EC-Earth3-HR': ['EC-Earth-Consortium'] - 'EC-Earth3-LR': ['EC-Earth-Consortium'] - 'EC-Earth3P': ['EC-Earth-Consortium'] - 'EC-Earth3P-HR': ['EC-Earth-Consortium'] - 'EC-Earth3P-VHR': ['EC-Earth-Consortium'] - 'EC-Earth3-Veg': ['EC-Earth-Consortium'] - 'EC-Earth3-Veg-LR': ['EC-Earth-Consortium'] - 'ECMWF-IFS-HR': ['ECMWF'] - 'ECMWF-IFS-LR': ['ECMWF'] - 'ECMWF-IFS-MR': ['ECMWF'] - 'EMAC-2-53-AerChem': ['MESSy-Consortium'] - 'EMAC-2-53-Vol': ['MESSy-Consortium'] - 'FGOALS-f3-H': ['CAS'] - 'FGOALS-f3-L': ['CAS'] - 'FGOALS-g3': ['CAS'] - 'FIO-ESM-2-0': ['FIO-QLNM'] - 'GFDL-AM4': ['NOAA-GFDL'] - 'GFDL-CM4C192': ['NOAA-GFDL'] - 'GFDL-CM4': ['NOAA-GFDL'] - 'GFDL-ESM2M': ['NOAA-GFDL'] - 'GFDL-ESM4': ['NOAA-GFDL'] - 'GFDL-OM4p5B': ['NOAA-GFDL'] - 'GISS-E2-1-G': ['NASA-GISS'] - 'GISS-E2-1-H': ['NASA-GISS'] - 'GISS-E2-1-MA-G': ['NASA-GISS'] - 'GISS-E3-G': ['NASA-GISS'] - 'HadGEM3-GC31-HH': ['MOHC', 'NERC'] - 'HadGEM3-GC31-HM': ['MOHC', 'NERC'] - 'HadGEM3-GC31-LL': ['MOHC'] - 'HadGEM3-GC31-LM': ['MOHC'] - 'HadGEM3-GC31-MH': ['MOHC'] - 'HadGEM3-GC31-MM': ['MOHC'] - 'ICON-ESM-LR': ['MPI-M'] - 'IITM-ESM': ['CCCR-IITM'] - 'INM-CM4-8': ['INM'] - 'INM-CM5-0': ['INM'] - 'INM-CM5-H': ['INM'] - 'IPSL-CM6A-ATM-HR': ['IPSL'] - 'IPSL-CM6A-LR': ['IPSL'] - 'KACE-1-0-G': ['NIMS-KMA'] - 'KIOST-ESM': ['KIOST'] - 'LBLRTM-12-8': ['AER'] - 'MCM-UA-1-0': ['UA'] - 'MIROC6': ['MIROC'] - 'MIROC-ES2H': ['MIROC'] - 'MIROC-ES2L': ['MIROC'] - 'MPI-ESM-1-2-HAM': ['HAMMOZ-Consortium'] - 'MPI-ESM1-2-HR': ['MPI-M', 'DWD', 'DKRZ'] - 'MPI-ESM1-2-LR': ['MPI-M', 'AWI'] - 'MRI-AGCM3-2': ['MRI'] - 'MRI-ESM2-0': ['MRI'] - 'NESM3': ['NUIST'] - 'NICAM16-7S': ['MIROC'] - 'NICAM16-8S': ['MIROC'] - 'NICAM16-9D-L78': ['MIROC'] - 'NICAM16-9S': ['MIROC'] - 'NorESM2-HH': ['NCC'] - 'NorESM2-LMEC': ['NCC'] - 'NorESM2-LME': ['NCC'] - 'NorESM2-LM': ['NCC'] - 'NorESM2-MH': ['NCC'] - 'NorESM2-MM': ['NCC'] - 'PCMDI-test-1-0': ['PCMDI'] - 'RRTMG-LW-4-91': ['AER'] - 'RRTMG-SW-4-02': ['AER'] - 'RTE-RRTMGP-181204': ['AER'] - 'SAM0-UNICON': ['SNU'] - 'TaiESM1': ['AS-RCEC'] - 'UKESM1-0-LL': ['MOHC', 'NERC', 'NIMS-KMA', 'NIWA'] - 'UKESM1-0-MMh': ['MOHC', 'NERC'] - 'UofT-CCSM4': ['UofT'] - 'VRESM-1-0': ['CSIR-CSIRO'] - CMIP5: cmor_strict: true diff --git a/tests/integration/cmor/test_table.py b/tests/integration/cmor/test_table.py index 84540a2972..b29996e3ee 100644 --- a/tests/integration/cmor/test_table.py +++ b/tests/integration/cmor/test_table.py @@ -40,6 +40,16 @@ def test_get_bad_variable(self): """Get none if a variable is not in the given table.""" self.assertIsNone(self.variables_info.get_variable('Omon', 'tas')) + def test_get_institute_from_source(self): + """Get institution for source ACCESS-1-0""" + institute = self.variables_info.institutes['ACCESS-1-0'] + self.assertListEqual(institute, ['CSIRO-BOM']) + + def test_get_activity_from_exp(self): + """Get activity for experiment 1pctCO2""" + activity = self.variables_info.activities['1pctCO2'] + self.assertListEqual(activity, ['CMIP']) + class Testobs4mipsInfo(unittest.TestCase): """Test for the obs$mips info class.""" diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index d2134cb090..220425ff6e 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -338,6 +338,97 @@ def test_empty_variable(tmp_path, patched_datafinder, config_user): assert product.attributes['dataset'] == 'CanESM2' +def test_cmip5_variable_autocomplete(tmp_path, patched_datafinder, + config_user): + """Test that required information is automatically added for CMIP5.""" + content = dedent(""" + diagnostics: + test: + additional_datasets: + - dataset: CanESM2 + project: CMIP5 + mip: 3hr + exp: historical + start_year: 2000 + end_year: 2001 + ensemble: r1i1p1 + variables: + pr: + scripts: null + """) + + recipe = get_recipe(tmp_path, content, config_user) + variable = recipe.diagnostics['test']['preprocessor_output']['pr'][0] + + reference = { + 'dataset': 'CanESM2', + 'diagnostic': 'test', + 'end_year': 2001, + 'ensemble': 'r1i1p1', + 'exp': 'historical', + 'frequency': '3hr', + 'institute': ['CCCma'], + 'long_name': 'Precipitation', + 'mip': '3hr', + 'modeling_realm': ['atmos'], + 'preprocessor': 'default', + 'project': 'CMIP5', + 'short_name': 'pr', + 'standard_name': 'precipitation_flux', + 'start_year': 2000, + 'units': 'kg m-2 s-1', + } + for key in reference: + assert variable[key] == reference[key] + + +def test_cmip6_variable_autocomplete(tmp_path, patched_datafinder, + config_user): + """Test that required information is automatically added for CMIP6.""" + content = dedent(""" + diagnostics: + test: + additional_datasets: + - dataset: HadGEM3-GC31-LL + project: CMIP6 + mip: 3hr + exp: historical + start_year: 2000 + end_year: 2001 + ensemble: r2i1p1f1 + grid: gn + variables: + pr: + scripts: null + """) + + recipe = get_recipe(tmp_path, content, config_user) + variable = recipe.diagnostics['test']['preprocessor_output']['pr'][0] + + reference = { + 'activity': ['CMIP'], + 'dataset': 'HadGEM3-GC31-LL', + 'diagnostic': 'test', + 'end_year': 2001, + 'ensemble': 'r2i1p1f1', + 'exp': 'historical', + 'frequency': '3hr', + 'grid': 'gn', + 'institute': ['MOHC'], + 'long_name': 'Precipitation', + 'mip': '3hr', + 'modeling_realm': ['atmos'], + 'preprocessor': 'default', + 'project': 'CMIP6', + 'short_name': 'pr', + 'standard_name': 'precipitation_flux', + 'start_year': 2000, + 'units': 'kg m-2 s-1', + } + for key in reference: + assert variable[key] == reference[key] + + def test_reference_dataset(tmp_path, patched_datafinder, config_user, monkeypatch): @@ -395,10 +486,10 @@ def test_reference_dataset(tmp_path, patched_datafinder, config_user, task = next(t for t in recipe.tasks if t.name == 'diagnostic_name' + TASKSEP + 'ta') assert len(task.products) == 2 - product = next( - p for p in task.products if p.attributes['dataset'] == 'GFDL-CM3') - reference = next( - p for p in task.products if p.attributes['dataset'] == 'MPI-ESM-LR') + product = next(p for p in task.products + if p.attributes['dataset'] == 'GFDL-CM3') + reference = next(p for p in task.products + if p.attributes['dataset'] == 'MPI-ESM-LR') assert product.settings['regrid']['target_grid'] == reference.files[0] assert product.settings['extract_levels']['levels'] == levels @@ -409,7 +500,7 @@ def test_reference_dataset(tmp_path, patched_datafinder, config_user, 'CMIP5', 'MPI-ESM-LR', 'ta', - fix_dir + fix_dir, ) assert 'regrid' not in reference.settings @@ -713,7 +804,7 @@ def test_diagnostic_task_provenance( patched_datafinder, monkeypatch, config_user, - ): +): monkeypatch.setattr(esmvalcore._config, 'TAGS', TAGS) monkeypatch.setattr(esmvalcore._recipe, 'TAGS', TAGS) monkeypatch.setattr(esmvalcore._task, 'TAGS', TAGS) @@ -762,8 +853,8 @@ def test_diagnostic_task_provenance( # Check that diagnostic script tags have been added for key in ('statistics', 'domains', 'authors', 'references'): - assert product.attributes[key] == tuple( - TAGS[key][k] for k in record[key]) + assert product.attributes[key] == tuple(TAGS[key][k] + for k in record[key]) # Check that recipe diagnostic tags have been added src = yaml.safe_load(DEFAULT_DOCUMENTATION + content)