diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index 85a80f5180..cac5bc086d 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -317,6 +317,19 @@ define the ``start_year`` and ``end_year`` items in the variable section, because the diagnostic script assumes that all the data has the same time range. +Variable short names usually do not change between datasets supported by +ESMValCore, as they are usually changed to match CMIP. Nevertheless, there are +small changes in variable names in CMIP6 with respect to CMIP5 (i.e. sea ice +concentration changed from ``sic`` to ``siconc``). ESMValCore is aware of some +of them and can do the automatic translation when needed. It will even do the +translation in the preprocessed file so the diagnostic does not have to deal +with this complexity, setting the short name in all files to match the one used +by the recipe. For example, if ``sic`` is requested, ESMValTool will +find ``sic`` or ``siconc`` depending on the project, but all preprocessed files +while use ``sic`` as their short_name. If the recipe requested ``siconc``, the +preprocessed files will be identical except that they will use the short_name +``siconc`` instead. + Diagnostic and variable specific datasets ----------------------------------------- The ``additional_datasets`` option can be used to add datasets beyond those diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 7a27dee4f6..782042022b 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -219,10 +219,12 @@ def _get_filenames_glob(variable, drs): def _find_input_files(variable, rootpath, drs): + short_name = variable['short_name'] + variable['short_name'] = variable['original_short_name'] input_dirs = _find_input_dirs(variable, rootpath, drs) filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) - + variable['short_name'] = short_name return (files, input_dirs, filenames_glob) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b48f935f49..e8aa549097 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -109,7 +109,7 @@ def _add_cmor_info(variable, override=False): raise RecipeError( f"Unable to load CMOR table (project) '{project}' for variable " f"'{short_name}' with mip '{mip}'") - + variable['original_short_name'] = table_entry.short_name for key in cmor_keys: if key not in variable or override: value = getattr(table_entry, key, None) @@ -348,6 +348,8 @@ def _get_default_settings(variable, config_user, derive=False): # Configure saving cubes to file settings['save'] = {'compress': config_user['compress_netcdf']} + if variable['short_name'] != variable['original_short_name']: + settings['save']['alias'] = variable['short_name'] return settings diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index 07a1ed86bf..bba016f4d0 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -11,6 +11,7 @@ import logging import os from functools import total_ordering +from collections import Counter from pathlib import Path import yaml @@ -50,40 +51,185 @@ def read_cmor_tables(cfg_developer=None): with cfg_file.open() as file: cfg_developer = yaml.safe_load(file) + cwd = os.path.dirname(os.path.realpath(__file__)) + var_alt_names_file = os.path.join(cwd, 'variable_alt_names.yml') + with open(var_alt_names_file, 'r') as yfile: + alt_names = yaml.safe_load(yfile) + custom = CustomInfo() CMOR_TABLES.clear() CMOR_TABLES['custom'] = custom install_dir = os.path.dirname(os.path.realpath(__file__)) for table in cfg_developer: - project = cfg_developer[table] - cmor_type = project.get('cmor_type', 'CMIP5') - default_path = os.path.join(install_dir, 'tables', cmor_type.lower()) - table_path = project.get('cmor_path', default_path) - table_path = os.path.expandvars(os.path.expanduser(table_path)) - cmor_strict = project.get('cmor_strict', True) - default_table_prefix = project.get('cmor_default_table_prefix', '') - - if cmor_type == 'CMIP3': - CMOR_TABLES[table] = CMIP3Info( - table_path, - default=custom, - strict=cmor_strict, - ) - elif cmor_type == 'CMIP5': - CMOR_TABLES[table] = CMIP5Info( - table_path, - default=custom, - strict=cmor_strict, - ) - elif cmor_type == 'CMIP6': - CMOR_TABLES[table] = CMIP6Info( - table_path, - default=custom, - strict=cmor_strict, - default_table_prefix=default_table_prefix) - - -class CMIP6Info(object): + CMOR_TABLES[table] = _read_table( + cfg_developer, table, install_dir, custom, alt_names) + + +def _read_table(cfg_developer, table, install_dir, custom, alt_names): + project = cfg_developer[table] + cmor_type = project.get('cmor_type', 'CMIP5') + default_path = os.path.join(install_dir, 'tables', cmor_type.lower()) + table_path = project.get('cmor_path', default_path) + table_path = os.path.expandvars(os.path.expanduser(table_path)) + cmor_strict = project.get('cmor_strict', True) + default_table_prefix = project.get('cmor_default_table_prefix', '') + + if cmor_type == 'CMIP3': + return CMIP3Info( + table_path, + default=custom, + strict=cmor_strict, + alt_names=alt_names, + ) + + if cmor_type == 'CMIP5': + return CMIP5Info( + table_path, + default=custom, + strict=cmor_strict, + alt_names=alt_names + ) + + if cmor_type == 'CMIP6': + return CMIP6Info( + table_path, + default=custom, + strict=cmor_strict, + default_table_prefix=default_table_prefix, + alt_names=alt_names, + ) + raise ValueError(f'Unsupported CMOR type {cmor_type}') + + +class InfoBase(): + """ + Base class for all table info classes. + + This uses CMOR 3 json format + + Parameters + ---------- + default: object + Default table to look variables on if not found + + alt_names: list[list[str]] + List of known alternative names for variables + + strict: bool + If False, will look for a variable in other tables if it can not be + found in the requested one + + """ + + def __init__(self, default, alt_names, strict): + if alt_names is None: + alt_names = "" + self.default = default + self.alt_names = alt_names + self.strict = strict + self.tables = {} + + def get_table(self, table): + """ + Search and return the table info. + + Parameters + ---------- + table: basestring + Table name + + Returns + ------- + TableInfo + Return the TableInfo object for the requested table if + found, returns None if not + + """ + return self.tables.get(table) + + def get_variable(self, table_name, short_name, derived=False): + """ + Search and return the variable info. + + Parameters + ---------- + table_name: basestring + Table name + short_name: basestring + Variable's short name + derived: bool, optional + Variable is derived. Info retrieval for derived variables always + look on the default tables if variable is not find in the + requested table + + Returns + ------- + VariableInfo + Return the VariableInfo object for the requested variable if + found, returns None if not + + """ + alt_names_list = self._get_alt_names_list(short_name) + + table = self.get_table(table_name) + if table: + for alt_names in alt_names_list: + try: + return table[alt_names] + except KeyError: + pass + + var_info = self._look_in_all_tables(alt_names_list) + if not var_info: + var_info = self._look_in_default( + derived, alt_names_list, table_name) + if var_info: + var_info = var_info.copy() + var_info = self._update_frequency_from_mip(table_name, var_info) + + return var_info + + def _look_in_default(self, derived, alt_names_list, table_name): + var_info = None + if (not self.strict or derived): + for alt_names in alt_names_list: + var_info = self.default.get_variable(table_name, alt_names) + if var_info: + break + return var_info + + def _look_in_all_tables(self, alt_names_list): + var_info = None + if not self.strict: + for alt_names in alt_names_list: + var_info = self._look_all_tables(alt_names) + if var_info: + break + return var_info + + def _get_alt_names_list(self, short_name): + alt_names_list = [short_name] + for alt_names in self.alt_names: + if short_name in alt_names: + alt_names_list.extend( + [alt_name for alt_name in alt_names + if alt_name not in alt_names_list]) + return alt_names_list + + def _update_frequency_from_mip(self, table_name, var_info): + mip_info = self.get_table(table_name) + if mip_info: + var_info.frequency = mip_info.frequency + return var_info + + def _look_all_tables(self, alt_names): + for table_vars in sorted(self.tables.values()): + if alt_names in table_vars: + return table_vars[alt_names] + return None + + +class CMIP6Info(InfoBase): """ Class to read CMIP6-like data request. @@ -103,31 +249,23 @@ class CMIP6Info(object): """ - _CMIP_5to6_varname = { - 'sic': 'siconc', - 'sit': 'sivol', - 'tro3': 'o3', - 'usi': 'siu', - 'vsi': 'siv', - } - def __init__(self, cmor_tables_path, default=None, + alt_names=None, strict=True, default_table_prefix=''): + + super().__init__(default, alt_names, strict) cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') if glob.glob(os.path.join(self._cmor_folder, '*_CV.json')): self._load_controlled_vocabulary() - self.default = default - self.strict = strict + self.default_table_prefix = default_table_prefix - self.tables = {} self.var_to_freq = {} - self.strict = strict self._load_coordinates() for json_file in glob.glob(os.path.join(self._cmor_folder, '*.json')): @@ -177,7 +315,6 @@ def _load_table(self, json_file): self.var_to_freq[table.name][var_name] = var.frequency if not table.frequency: - from collections import Counter var_freqs = (var.frequency for var in table.values()) table_freq, _ = Counter(var_freqs).most_common(1)[0] table.frequency = table_freq @@ -255,54 +392,6 @@ def get_table(self, table): except KeyError: return self.tables.get(''.join((self.default_table_prefix, table))) - def get_variable(self, table_name, short_name, derived=False): - """ - Search and return the variable info. - - Parameters - ---------- - table_name: basestring - Table name - short_name: basestring - Variable's short name - derived: bool, optional - Variable is derived. Info retrieval is less strict - - Returns - ------- - VariableInfo - Return the VariableInfo object for the requested variable if - found, returns None if not - - """ - table = self.get_table(table_name) - if table: - try: - return table[short_name] - except KeyError: - pass - - if short_name in CMIP6Info._CMIP_5to6_varname: - new_short_name = CMIP6Info._CMIP_5to6_varname[short_name] - return self.get_variable(table_name, new_short_name, derived) - - var_info = None - if not self.strict: - for table_vars in sorted(self.tables.values()): - if short_name in table_vars: - var_info = table_vars[short_name] - break - if not var_info and (not self.strict or derived): - var_info = self.default.get_variable(table_name, short_name) - - if var_info: - mip_info = self.get_table(table_name) - if mip_info: - var_info = var_info.copy() - var_info.frequency = mip_info.frequency - - return var_info - @staticmethod def _is_table(table_data): if 'variable_entry' not in table_data: @@ -315,6 +404,7 @@ def _is_table(table_data): @total_ordering class TableInfo(dict): """Container class for storing a CMOR table.""" + def __init__(self, *args, **kwargs): """Create a new TableInfo object for storing VariableInfo objects.""" super(TableInfo, self).__init__(*args, **kwargs) @@ -341,6 +431,7 @@ class JsonInfo(object): Provides common utility methods to read json variables """ + def __init__(self): self._json_data = {} @@ -385,6 +476,7 @@ def _read_json_list_variable(self, parameter): class VariableInfo(JsonInfo): """Class to read and store variable information.""" + def __init__(self, table_type, short_name): """ Class to read and store variable information. @@ -462,8 +554,8 @@ def read_json(self, json_data, default_freq): self.valid_min = self._read_json_variable('valid_min') self.valid_max = self._read_json_variable('valid_max') self.positive = self._read_json_variable('positive') - self.modeling_realm = \ - self._read_json_variable('modeling_realm').split() + self.modeling_realm = self._read_json_variable( + 'modeling_realm').split() self.frequency = self._read_json_variable('frequency', default_freq) self.dimensions = self._read_json_variable('dimensions').split() @@ -471,6 +563,7 @@ def read_json(self, json_data, default_freq): class CoordinateInfo(JsonInfo): """Class to read and store coordinate information.""" + def __init__(self, name): """ Class to read and store coordinate information. @@ -513,6 +606,7 @@ def __init__(self, name): """Maximum allowed value""" self.must_have_bounds = "" """Whether bounds are required on this dimension""" + def read_json(self, json_data): """ Read coordinate information from json. @@ -542,7 +636,7 @@ def read_json(self, json_data): self.must_have_bounds = self._read_json_variable('must_have_bounds') -class CMIP5Info(object): +class CMIP5Info(InfoBase): """ Class to read CMIP5-like data request. @@ -559,7 +653,10 @@ class CMIP5Info(object): found in the requested one """ - def __init__(self, cmor_tables_path, default=None, strict=True): + + def __init__(self, cmor_tables_path, default=None, alt_names=None, + strict=True): + super().__init__(default, alt_names, strict) cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') @@ -570,8 +667,6 @@ def __init__(self, cmor_tables_path, default=None, strict=True): self.strict = strict self.tables = {} self.coords = {} - self.default = default - self.strict = strict self._current_table = None self._last_line_read = None @@ -700,44 +795,6 @@ def get_table(self, table): """ return self.tables.get(table) - def get_variable(self, table, short_name, derived=False): - """ - Search and return the variable info. - - Parameters - ---------- - table: basestring - Table name - short_name: basestring - Variable's short name - derived: bool, optional - Variable is derived. Info retrieval is less strict - - Returns - ------- - VariableInfo - Return the VariableInfo object for the requested variable if - found, returns None if not - - """ - var_info = self.tables.get(table, {}).get(short_name, None) - if var_info: - return var_info - if not self.strict: - for table_vars in sorted(self.tables.values()): - if short_name in table_vars: - var_info = table_vars[short_name] - break - if not var_info and (derived or not self.strict): - var_info = self.default.get_variable(table, short_name) - - if var_info: - mip_info = self.get_table(table) - var_info = var_info.copy() - if mip_info: - var_info.frequency = mip_info.frequency - return var_info - class CMIP3Info(CMIP5Info): """ @@ -756,6 +813,7 @@ class CMIP3Info(CMIP5Info): found in the requested one """ + def _read_table_file(self, table_file, table=None): for dim in ('zlevel', ): coord = CoordinateInfo(dim) @@ -789,6 +847,7 @@ class CustomInfo(CMIP5Info): ESMValTool repository """ + def __init__(self, cmor_tables_path=None): cwd = os.path.dirname(os.path.realpath(__file__)) self._cmor_folder = os.path.join(cwd, 'tables', 'custom') @@ -817,24 +876,6 @@ def __init__(self, cmor_tables_path=None): print(msg) raise - def get_table(self, table): - """ - Search and return the table info. - - Parameters - ---------- - table: basestring - Table name - - Returns - ------- - TableInfo - Return the TableInfo object for the requested table if - found, returns None if not - - """ - return self.tables.get(table) - def get_variable(self, table, short_name, derived=False): """ Search and return the variable info. @@ -846,7 +887,9 @@ def get_variable(self, table, short_name, derived=False): short_name: basestring Variable's short name derived: bool, optional - Variable is derived. Info retrieval is less strict + Variable is derived. Info retrieval for derived variables always + look on the default tables if variable is not find in the + requested table Returns ------- diff --git a/esmvalcore/cmor/variable_alt_names.yml b/esmvalcore/cmor/variable_alt_names.yml new file mode 100644 index 0000000000..787a20c99a --- /dev/null +++ b/esmvalcore/cmor/variable_alt_names.yml @@ -0,0 +1,15 @@ +############################################################################### +# Variable short name aliases +############################################################################### +# This file contains the list of variable short name aliases that are used +# in different projects. Will allow us to keep track of changes in variable +# short names across projects to simplify the usage for the users +# +# This file contains a list of lists +############################################################################### +--- +- ['sic', 'siconc'] +- ['sit', 'sithick'] +- ['tro3', 'o3'] +- ['usi', 'siu'] +- ['vsi', 'siv'] \ No newline at end of file diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index b374357ca0..f132644c1d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -198,7 +198,8 @@ def concatenate(cubes): return result -def save(cubes, filename, optimize_access='', compress=False, **kwargs): +def save(cubes, filename, optimize_access='', compress=False, alias='', + **kwargs): """ Save iris cubes to file. @@ -263,6 +264,12 @@ def save(cubes, filename, optimize_access='', compress=False, **kwargs): for index, length in enumerate(cube.shape)) kwargs['fill_value'] = GLOBAL_FILL_VALUE + if alias: + + for cube in cubes: + logger.debug( + 'Changing var_name from %s to %s', cube.var_name, alias) + cube.var_name = alias iris.save(cubes, **kwargs) return filename @@ -323,6 +330,8 @@ def write_metadata(products, write_ncl=False): if isinstance(product.attributes.get('exp'), (list, tuple)): product.attributes = dict(product.attributes) product.attributes['exp'] = '-'.join(product.attributes['exp']) + if 'original_short_name' in product.attributes: + del product.attributes['original_short_name'] metadata[product.filename] = product.attributes output_filename = os.path.join(output_dir, 'metadata.yml') diff --git a/tests/integration/cmor/test_table.py b/tests/integration/cmor/test_table.py index 0db967c5a4..63a5ab15f4 100644 --- a/tests/integration/cmor/test_table.py +++ b/tests/integration/cmor/test_table.py @@ -18,7 +18,11 @@ def setUpClass(cls): We read CMIP6Info once to keep tests times manageable """ cls.variables_info = CMIP6Info( - 'cmip6', default=CustomInfo(), strict=True + 'cmip6', default=CustomInfo(), strict=True, + alt_names=[ + ['sic', 'siconc'], + ['tro3', 'o3'], + ] ) def setUp(self): @@ -44,8 +48,8 @@ def test_get_variable_tas(self): var = self.variables_info.get_variable('Amon', 'tas') self.assertEqual(var.short_name, 'tas') - def test_get_variable_from_alias(self): - """Get a variable from a known alias.""" + def test_get_variable_from_alt_names(self): + """Get a variable from a known alt_names.""" var = self.variables_info.get_variable('SImon', 'sic') self.assertEqual(var.short_name, 'siconc') diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml index 3d73cdd17e..81f907a046 100644 --- a/tests/integration/data_finder.yml +++ b/tests/integration/data_finder.yml @@ -4,6 +4,7 @@ get_output_file: - variable: &variable variable_group: test short_name: ta + original_short_name: ta dataset: HadGEM2-ES project: CMIP5 cmor_table: CMIP5 @@ -42,6 +43,36 @@ get_input_filelist: found_files: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc + - drs: default + variable: + variable_group: test + short_name: tro3 + original_short_name: o3 + dataset: HadGEM2-ES + project: CMIP6 + cmor_table: CMIP6 + institute: [INPE, MOHC] + frequency: mon + modeling_realm: [atmos] + mip: Amon + exp: historical + ensemble: r1i1p1 + start_year: 1960 + end_year: 1980 + diagnostic: test_diag + preprocessor: test_preproc + grid: gn + available_files: + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_193412-195911.nc + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_195912-198411.nc + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_198412-200511.nc + dirs: + - '' + file_patterns: + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn*.nc + found_files: + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_195912-198411.nc + - drs: default variable: <<: *variable diff --git a/tests/integration/preprocessor/_io/test_save.py b/tests/integration/preprocessor/_io/test_save.py index 12404c55da..cc6c98364c 100644 --- a/tests/integration/preprocessor/_io/test_save.py +++ b/tests/integration/preprocessor/_io/test_save.py @@ -57,6 +57,14 @@ def test_save(self): loaded_cube = iris.load_cube(path) self._compare_cubes(cube, loaded_cube) + def test_save_alias(self): + """Test save""" + cube, filename = self._create_sample_cube() + path = save([cube], filename, alias='alias') + loaded_cube = iris.load_cube(path) + self._compare_cubes(cube, loaded_cube) + self.assertEqual(loaded_cube.var_name, 'alias') + def test_save_zlib(self): """Test save""" cube, filename = self._create_sample_cube() diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index bf1864eaec..06a99d3fa3 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -697,6 +697,7 @@ def test_simple_cordex_recipe(tmp_path, patched_datafinder, 'recipe_dataset_index': 0, 'rcm_version': 'v1', 'short_name': 'tas', + 'original_short_name': 'tas', 'standard_name': 'air_temperature', 'start_year': 1991, 'units': 'K',