Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions esmvalcore/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import yaml

from .cmor.table import read_cmor_tables
from .cmor.table import read_cmor_tables, CMOR_TABLES

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -172,13 +172,30 @@ def get_project_config(project):


def get_institutes(variable):
"""Return the institutes given the dataset name in CMIP5."""
"""Return the institutes given the dataset name in CMIP5 and CMIP6."""
dataset = variable['dataset']
project = variable['project']
logger.debug("Retrieving institutes for dataset %s", dataset)
try:
return CMOR_TABLES[project].institutes[dataset]
except (KeyError, AttributeError):
pass
return CFG.get(project, {}).get('institutes', {}).get(dataset, [])


def get_activity(variable):
"""Return the activity given the experiment name in CMIP6."""
project = variable['project']
try:
exp = variable['exp']
Comment thread
zklaus marked this conversation as resolved.
logger.debug("Retrieving activity_id for experiment %s", exp)
if isinstance(exp, list):
return [CMOR_TABLES[project].activities[value] for value in exp]
return CMOR_TABLES[project].activities[exp]
except (KeyError, AttributeError):
return None


def replace_mip_fx(fx_file):
"""Replace MIP so to retrieve correct fx files."""
default_mip = 'Amon'
Expand Down
5 changes: 4 additions & 1 deletion esmvalcore/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from . import __version__
from . import _recipe_checks as check
from ._config import TAGS, get_institutes, replace_tags
from ._config import TAGS, get_institutes, get_activity, replace_tags
from ._data_finder import (get_input_filelist, get_input_fx_filelist,
get_output_file, get_statistic_output_file)
from ._provenance import TrackedFile, get_recipe_provenance
Expand Down Expand Up @@ -906,6 +906,9 @@ def _initialize_variables(self, raw_variable, raw_datasets):
institute = get_institutes(variable)
if institute:
variable['institute'] = institute
activity = get_activity(variable)
if activity:
variable['activity'] = activity
check.variable(variable, required_keys)
if 'fx_files' in variable:
for fx_file in variable['fx_files']:
Expand Down
25 changes: 25 additions & 0 deletions esmvalcore/cmor/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def __init__(self, cmor_tables_path, default=None):
cmor_tables_path = self._get_cmor_path(cmor_tables_path)

self._cmor_folder = os.path.join(cmor_tables_path, 'Tables')
if glob.glob(os.path.join(self._cmor_folder, '*_CV.json')):
self._load_controlled_vocabulary()
self.default = default

self.tables = {}
Expand Down Expand Up @@ -141,6 +143,29 @@ def _load_coordinates(self):
coord.read_json(table_data['axis_entry'][coord_name])
self.coords[coord_name] = coord

def _load_controlled_vocabulary(self):
self.activities = {}
self.institutes = {}
for json_file in glob.glob(
os.path.join(self._cmor_folder, '*_CV.json')):
with open(json_file) as inf:
table_data = json.loads(inf.read())
try:
exps = table_data['CV']['experiment_id']
for exp_id in exps:
activity = exps[exp_id]['activity_id']
self.activities[exp_id] = activity
except (KeyError, AttributeError):
pass

try:
sources = table_data['CV']['source_id']
for source_id in sources:
institution = sources[source_id]['institution_id']
self.institutes[source_id] = institution
except (KeyError, AttributeError):
pass

def get_table(self, table):
"""
Search and return the table info.
Expand Down
113 changes: 2 additions & 111 deletions esmvalcore/config-developer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,121 +21,12 @@
CMIP6:
input_dir:
default: '/'
BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
BADC: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
DKRZ: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc'
output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]'
cmor_type: 'CMIP6'
institutes:
'ACCESS-CM2': ['CSIRO-ARCCSS-BoM']
'ACCESS-ESM1-5': ['CSIRO']
'ARTS-2-3': ['UHH']
'AWI-CM-1-1-HR': ['AWI']
'AWI-CM-1-1-LR': ['AWI']
'AWI-CM-1-1-MR': ['AWI']
'AWI-ESM-1-1-LR': ['AWI']
'BCC-CSM2-HR': ['BCC']
'BCC-CSM2-MR': ['BCC']
'BCC-ESM1': ['BCC']
'BESM-2-7': ['INPE']
'BNU-ESM-1-1': ['BNU']
'CAMS-CSM1-0': ['CAMS']
'CanESM5': ['CCCma']
'CAS-ESM1-0': ['CAS']
'CESM2': ['NCAR']
'CESM2-SE': ['NCAR']
'CESM2-WACCM': ['NCAR']
'CIESM': ['THU']
'CMCC-CM2-HR4': ['CMCC']
'CMCC-CM2-HR5': ['CMCC']
'CMCC-CM2-SR5': ['CMCC']
'CMCC-CM2-VHR4': ['CMCC']
'CMCC-ESM2-HR5': ['CMCC']
'CMCC-ESM2-SR5': ['CMCC']
'CNRM-CM6-1': ['CNRM-CERFACS']
'CNRM-CM6-1-HR': ['CNRM-CERFACS']
'CNRM-ESM2-1': ['CNRM-CERFACS']
'CNRM-ESM2-1-HR': ['CNRM-CERFACS']
'CSIRO-Mk3L-1-3': ['UTAS']
'E3SM-1-0': ['E3SM-Project']
'EC-Earth3-AerChem': ['EC-Earth-Consortium']
'EC-Earth3-CC': ['EC-Earth-Consortium']
'EC-Earth3': ['EC-Earth-Consortium']
'EC-Earth3-GrIS': ['EC-Earth-Consortium']
'EC-Earth3-HR': ['EC-Earth-Consortium']
'EC-Earth3-LR': ['EC-Earth-Consortium']
'EC-Earth3P': ['EC-Earth-Consortium']
'EC-Earth3P-HR': ['EC-Earth-Consortium']
'EC-Earth3P-VHR': ['EC-Earth-Consortium']
'EC-Earth3-Veg': ['EC-Earth-Consortium']
'EC-Earth3-Veg-LR': ['EC-Earth-Consortium']
'ECMWF-IFS-HR': ['ECMWF']
'ECMWF-IFS-LR': ['ECMWF']
'ECMWF-IFS-MR': ['ECMWF']
'EMAC-2-53-AerChem': ['MESSy-Consortium']
'EMAC-2-53-Vol': ['MESSy-Consortium']
'FGOALS-f3-H': ['CAS']
'FGOALS-f3-L': ['CAS']
'FGOALS-g3': ['CAS']
'FIO-ESM-2-0': ['FIO-QLNM']
'GFDL-AM4': ['NOAA-GFDL']
'GFDL-CM4C192': ['NOAA-GFDL']
'GFDL-CM4': ['NOAA-GFDL']
'GFDL-ESM2M': ['NOAA-GFDL']
'GFDL-ESM4': ['NOAA-GFDL']
'GFDL-OM4p5B': ['NOAA-GFDL']
'GISS-E2-1-G': ['NASA-GISS']
'GISS-E2-1-H': ['NASA-GISS']
'GISS-E2-1-MA-G': ['NASA-GISS']
'GISS-E3-G': ['NASA-GISS']
'HadGEM3-GC31-HH': ['MOHC', 'NERC']
'HadGEM3-GC31-HM': ['MOHC', 'NERC']
'HadGEM3-GC31-LL': ['MOHC']
'HadGEM3-GC31-LM': ['MOHC']
'HadGEM3-GC31-MH': ['MOHC']
'HadGEM3-GC31-MM': ['MOHC']
'ICON-ESM-LR': ['MPI-M']
'IITM-ESM': ['CCCR-IITM']
'INM-CM4-8': ['INM']
'INM-CM5-0': ['INM']
'INM-CM5-H': ['INM']
'IPSL-CM6A-ATM-HR': ['IPSL']
'IPSL-CM6A-LR': ['IPSL']
'KACE-1-0-G': ['NIMS-KMA']
'KIOST-ESM': ['KIOST']
'LBLRTM-12-8': ['AER']
'MCM-UA-1-0': ['UA']
'MIROC6': ['MIROC']
'MIROC-ES2H': ['MIROC']
'MIROC-ES2L': ['MIROC']
'MPI-ESM-1-2-HAM': ['HAMMOZ-Consortium']
'MPI-ESM1-2-HR': ['MPI-M', 'DWD', 'DKRZ']
'MPI-ESM1-2-LR': ['MPI-M', 'AWI']
'MRI-AGCM3-2': ['MRI']
'MRI-ESM2-0': ['MRI']
'NESM3': ['NUIST']
'NICAM16-7S': ['MIROC']
'NICAM16-8S': ['MIROC']
'NICAM16-9D-L78': ['MIROC']
'NICAM16-9S': ['MIROC']
'NorESM2-HH': ['NCC']
'NorESM2-LMEC': ['NCC']
'NorESM2-LME': ['NCC']
'NorESM2-LM': ['NCC']
'NorESM2-MH': ['NCC']
'NorESM2-MM': ['NCC']
'PCMDI-test-1-0': ['PCMDI']
'RRTMG-LW-4-91': ['AER']
'RRTMG-SW-4-02': ['AER']
'RTE-RRTMGP-181204': ['AER']
'SAM0-UNICON': ['SNU']
'TaiESM1': ['AS-RCEC']
'UKESM1-0-LL': ['MOHC', 'NERC', 'NIMS-KMA', 'NIWA']
'UKESM1-0-MMh': ['MOHC', 'NERC']
'UofT-CCSM4': ['UofT']
'VRESM-1-0': ['CSIR-CSIRO']


CMIP5:
cmor_strict: true
Expand Down
10 changes: 10 additions & 0 deletions tests/integration/cmor/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ def test_get_bad_variable(self):
"""Get none if a variable is not in the given table."""
self.assertIsNone(self.variables_info.get_variable('Omon', 'tas'))

def test_get_institute_from_source(self):
"""Get institution for source ACCESS-1-0"""
institute = self.variables_info.institutes['ACCESS-1-0']
self.assertListEqual(institute, ['CSIRO-BOM'])

def test_get_activity_from_exp(self):
"""Get activity for experiment 1pctCO2"""
activity = self.variables_info.activities['1pctCO2']
self.assertListEqual(activity, ['CMIP'])


class Testobs4mipsInfo(unittest.TestCase):
"""Test for the obs$mips info class."""
Expand Down
107 changes: 99 additions & 8 deletions tests/integration/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,97 @@ def test_empty_variable(tmp_path, patched_datafinder, config_user):
assert product.attributes['dataset'] == 'CanESM2'


def test_cmip5_variable_autocomplete(tmp_path, patched_datafinder,
config_user):
"""Test that required information is automatically added for CMIP5."""
content = dedent("""
diagnostics:
test:
additional_datasets:
- dataset: CanESM2
project: CMIP5
mip: 3hr
exp: historical
start_year: 2000
end_year: 2001
ensemble: r1i1p1
variables:
pr:
scripts: null
""")

recipe = get_recipe(tmp_path, content, config_user)
variable = recipe.diagnostics['test']['preprocessor_output']['pr'][0]

reference = {
'dataset': 'CanESM2',
'diagnostic': 'test',
'end_year': 2001,
'ensemble': 'r1i1p1',
'exp': 'historical',
'frequency': '3hr',
'institute': ['CCCma'],
'long_name': 'Precipitation',
'mip': '3hr',
'modeling_realm': ['atmos'],
'preprocessor': 'default',
'project': 'CMIP5',
'short_name': 'pr',
'standard_name': 'precipitation_flux',
'start_year': 2000,
'units': 'kg m-2 s-1',
}
for key in reference:
assert variable[key] == reference[key]


def test_cmip6_variable_autocomplete(tmp_path, patched_datafinder,
config_user):
"""Test that required information is automatically added for CMIP6."""
content = dedent("""
diagnostics:
test:
additional_datasets:
- dataset: HadGEM3-GC31-LL
project: CMIP6
mip: 3hr
exp: historical
start_year: 2000
end_year: 2001
ensemble: r2i1p1f1
grid: gn
variables:
pr:
scripts: null
""")

recipe = get_recipe(tmp_path, content, config_user)
variable = recipe.diagnostics['test']['preprocessor_output']['pr'][0]

reference = {
'activity': ['CMIP'],
'dataset': 'HadGEM3-GC31-LL',
'diagnostic': 'test',
'end_year': 2001,
'ensemble': 'r2i1p1f1',
'exp': 'historical',
'frequency': '3hr',
'grid': 'gn',
'institute': ['MOHC'],
'long_name': 'Precipitation',
'mip': '3hr',
'modeling_realm': ['atmos'],
'preprocessor': 'default',
'project': 'CMIP6',
'short_name': 'pr',
'standard_name': 'precipitation_flux',
'start_year': 2000,
'units': 'kg m-2 s-1',
}
for key in reference:
assert variable[key] == reference[key]


def test_reference_dataset(tmp_path, patched_datafinder, config_user,
monkeypatch):

Expand Down Expand Up @@ -395,10 +486,10 @@ def test_reference_dataset(tmp_path, patched_datafinder, config_user,
task = next(t for t in recipe.tasks
if t.name == 'diagnostic_name' + TASKSEP + 'ta')
assert len(task.products) == 2
product = next(
p for p in task.products if p.attributes['dataset'] == 'GFDL-CM3')
reference = next(
p for p in task.products if p.attributes['dataset'] == 'MPI-ESM-LR')
product = next(p for p in task.products
if p.attributes['dataset'] == 'GFDL-CM3')
reference = next(p for p in task.products
if p.attributes['dataset'] == 'MPI-ESM-LR')

assert product.settings['regrid']['target_grid'] == reference.files[0]
assert product.settings['extract_levels']['levels'] == levels
Expand All @@ -409,7 +500,7 @@ def test_reference_dataset(tmp_path, patched_datafinder, config_user,
'CMIP5',
'MPI-ESM-LR',
'ta',
fix_dir
fix_dir,
)

assert 'regrid' not in reference.settings
Expand Down Expand Up @@ -713,7 +804,7 @@ def test_diagnostic_task_provenance(
patched_datafinder,
monkeypatch,
config_user,
):
):
monkeypatch.setattr(esmvalcore._config, 'TAGS', TAGS)
monkeypatch.setattr(esmvalcore._recipe, 'TAGS', TAGS)
monkeypatch.setattr(esmvalcore._task, 'TAGS', TAGS)
Expand Down Expand Up @@ -762,8 +853,8 @@ def test_diagnostic_task_provenance(

# Check that diagnostic script tags have been added
for key in ('statistics', 'domains', 'authors', 'references'):
assert product.attributes[key] == tuple(
TAGS[key][k] for k in record[key])
assert product.attributes[key] == tuple(TAGS[key][k]
for k in record[key])

# Check that recipe diagnostic tags have been added
src = yaml.safe_load(DEFAULT_DOCUMENTATION + content)
Expand Down