From 1705f6d4c50acf2c49a1780721c22c85bdfffb9d Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 30 Mar 2020 16:54:30 +0200 Subject: [PATCH 01/13] Improve alias managament --- esmvalcore/cmor/table.py | 187 +++++++++++++-------------- esmvalcore/cmor/variable_alias.yml | 15 +++ tests/integration/cmor/test_table.py | 6 +- 3 files changed, 107 insertions(+), 101 deletions(-) create mode 100644 esmvalcore/cmor/variable_alias.yml diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index 07a1ed86bf..406b96a7cb 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -50,6 +50,11 @@ def read_cmor_tables(cfg_developer=None): with cfg_file.open() as file: cfg_developer = yaml.safe_load(file) + cwd = os.path.dirname(os.path.realpath(__file__)) + alias_file = os.path.join(cwd, 'variable_alias.yml') + with open(alias_file, 'r') as yfile: + alias = yaml.safe_load(yfile) + custom = CustomInfo() CMOR_TABLES.clear() CMOR_TABLES['custom'] = custom @@ -68,22 +73,87 @@ def read_cmor_tables(cfg_developer=None): table_path, default=custom, strict=cmor_strict, + alias=alias, ) elif cmor_type == 'CMIP5': CMOR_TABLES[table] = CMIP5Info( table_path, default=custom, strict=cmor_strict, + alias=alias ) elif cmor_type == 'CMIP6': CMOR_TABLES[table] = CMIP6Info( table_path, default=custom, strict=cmor_strict, - default_table_prefix=default_table_prefix) + default_table_prefix=default_table_prefix, + alias=alias, + ) + + +class InfoBase(object): + + def get_variable(self, table_name, short_name, derived=False): + """ + Search and return the variable info. + + Parameters + ---------- + table_name: basestring + Table name + short_name: basestring + Variable's short name + derived: bool, optional + Variable is derived. Info retrieval is less strict + + Returns + ------- + VariableInfo + Return the VariableInfo object for the requested variable if + found, returns None if not + """ + aliases = [short_name] + for alias_list in self.alias: + if short_name in alias_list: + aliases.extend( + [alias for alias in alias_list if alias not in aliases]) + + table = self.get_table(table_name) + if table: + for alias in aliases: + try: + return table[alias] + except KeyError: + pass -class CMIP6Info(object): + var_info = None + if not self.strict: + for alias in aliases: + var_info = self._look_all_tables(alias) + if var_info: + break + if not var_info and (not self.strict or derived): + for alias in aliases: + var_info = self.default.get_variable(table_name, alias) + if var_info: + break + + if var_info: + mip_info = self.get_table(table_name) + var_info = var_info.copy() + if mip_info: + var_info.frequency = mip_info.frequency + + def _look_all_tables(self, alias): + for table_vars in sorted(self.tables.values()): + if alias in table_vars: + return table_vars[alias] + return None + + +class CMIP6Info(InfoBase): """ Class to read CMIP6-like data request. @@ -103,17 +173,10 @@ class CMIP6Info(object): """ - _CMIP_5to6_varname = { - 'sic': 'siconc', - 'sit': 'sivol', - 'tro3': 'o3', - 'usi': 'siu', - 'vsi': 'siv', - } - def __init__(self, cmor_tables_path, default=None, + alias=[], strict=True, default_table_prefix=''): cmor_tables_path = self._get_cmor_path(cmor_tables_path) @@ -122,6 +185,7 @@ def __init__(self, if glob.glob(os.path.join(self._cmor_folder, '*_CV.json')): self._load_controlled_vocabulary() self.default = default + self.alias = alias self.strict = strict self.default_table_prefix = default_table_prefix @@ -255,54 +319,6 @@ def get_table(self, table): except KeyError: return self.tables.get(''.join((self.default_table_prefix, table))) - def get_variable(self, table_name, short_name, derived=False): - """ - Search and return the variable info. - - Parameters - ---------- - table_name: basestring - Table name - short_name: basestring - Variable's short name - derived: bool, optional - Variable is derived. Info retrieval is less strict - - Returns - ------- - VariableInfo - Return the VariableInfo object for the requested variable if - found, returns None if not - - """ - table = self.get_table(table_name) - if table: - try: - return table[short_name] - except KeyError: - pass - - if short_name in CMIP6Info._CMIP_5to6_varname: - new_short_name = CMIP6Info._CMIP_5to6_varname[short_name] - return self.get_variable(table_name, new_short_name, derived) - - var_info = None - if not self.strict: - for table_vars in sorted(self.tables.values()): - if short_name in table_vars: - var_info = table_vars[short_name] - break - if not var_info and (not self.strict or derived): - var_info = self.default.get_variable(table_name, short_name) - - if var_info: - mip_info = self.get_table(table_name) - if mip_info: - var_info = var_info.copy() - var_info.frequency = mip_info.frequency - - return var_info - @staticmethod def _is_table(table_data): if 'variable_entry' not in table_data: @@ -315,6 +331,7 @@ def _is_table(table_data): @total_ordering class TableInfo(dict): """Container class for storing a CMOR table.""" + def __init__(self, *args, **kwargs): """Create a new TableInfo object for storing VariableInfo objects.""" super(TableInfo, self).__init__(*args, **kwargs) @@ -341,6 +358,7 @@ class JsonInfo(object): Provides common utility methods to read json variables """ + def __init__(self): self._json_data = {} @@ -385,6 +403,7 @@ def _read_json_list_variable(self, parameter): class VariableInfo(JsonInfo): """Class to read and store variable information.""" + def __init__(self, table_type, short_name): """ Class to read and store variable information. @@ -462,8 +481,8 @@ def read_json(self, json_data, default_freq): self.valid_min = self._read_json_variable('valid_min') self.valid_max = self._read_json_variable('valid_max') self.positive = self._read_json_variable('positive') - self.modeling_realm = \ - self._read_json_variable('modeling_realm').split() + self.modeling_realm = self._read_json_variable( + 'modeling_realm').split() self.frequency = self._read_json_variable('frequency', default_freq) self.dimensions = self._read_json_variable('dimensions').split() @@ -471,6 +490,7 @@ def read_json(self, json_data, default_freq): class CoordinateInfo(JsonInfo): """Class to read and store coordinate information.""" + def __init__(self, name): """ Class to read and store coordinate information. @@ -513,6 +533,7 @@ def __init__(self, name): """Maximum allowed value""" self.must_have_bounds = "" """Whether bounds are required on this dimension""" + def read_json(self, json_data): """ Read coordinate information from json. @@ -542,7 +563,7 @@ def read_json(self, json_data): self.must_have_bounds = self._read_json_variable('must_have_bounds') -class CMIP5Info(object): +class CMIP5Info(InfoBase): """ Class to read CMIP5-like data request. @@ -559,7 +580,8 @@ class CMIP5Info(object): found in the requested one """ - def __init__(self, cmor_tables_path, default=None, strict=True): + + def __init__(self, cmor_tables_path, default=None, alias={}, strict=True): cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') @@ -572,6 +594,7 @@ def __init__(self, cmor_tables_path, default=None, strict=True): self.coords = {} self.default = default self.strict = strict + self.alias = alias self._current_table = None self._last_line_read = None @@ -700,44 +723,6 @@ def get_table(self, table): """ return self.tables.get(table) - def get_variable(self, table, short_name, derived=False): - """ - Search and return the variable info. - - Parameters - ---------- - table: basestring - Table name - short_name: basestring - Variable's short name - derived: bool, optional - Variable is derived. Info retrieval is less strict - - Returns - ------- - VariableInfo - Return the VariableInfo object for the requested variable if - found, returns None if not - - """ - var_info = self.tables.get(table, {}).get(short_name, None) - if var_info: - return var_info - if not self.strict: - for table_vars in sorted(self.tables.values()): - if short_name in table_vars: - var_info = table_vars[short_name] - break - if not var_info and (derived or not self.strict): - var_info = self.default.get_variable(table, short_name) - - if var_info: - mip_info = self.get_table(table) - var_info = var_info.copy() - if mip_info: - var_info.frequency = mip_info.frequency - return var_info - class CMIP3Info(CMIP5Info): """ @@ -756,6 +741,7 @@ class CMIP3Info(CMIP5Info): found in the requested one """ + def _read_table_file(self, table_file, table=None): for dim in ('zlevel', ): coord = CoordinateInfo(dim) @@ -789,6 +775,7 @@ class CustomInfo(CMIP5Info): ESMValTool repository """ + def __init__(self, cmor_tables_path=None): cwd = os.path.dirname(os.path.realpath(__file__)) self._cmor_folder = os.path.join(cwd, 'tables', 'custom') diff --git a/esmvalcore/cmor/variable_alias.yml b/esmvalcore/cmor/variable_alias.yml new file mode 100644 index 0000000000..787a20c99a --- /dev/null +++ b/esmvalcore/cmor/variable_alias.yml @@ -0,0 +1,15 @@ +############################################################################### +# Variable short name aliases +############################################################################### +# This file contains the list of variable short name aliases that are used +# in different projects. Will allow us to keep track of changes in variable +# short names across projects to simplify the usage for the users +# +# This file contains a list of lists +############################################################################### +--- +- ['sic', 'siconc'] +- ['sit', 'sithick'] +- ['tro3', 'o3'] +- ['usi', 'siu'] +- ['vsi', 'siv'] \ No newline at end of file diff --git a/tests/integration/cmor/test_table.py b/tests/integration/cmor/test_table.py index 0db967c5a4..2e090d17aa 100644 --- a/tests/integration/cmor/test_table.py +++ b/tests/integration/cmor/test_table.py @@ -18,7 +18,11 @@ def setUpClass(cls): We read CMIP6Info once to keep tests times manageable """ cls.variables_info = CMIP6Info( - 'cmip6', default=CustomInfo(), strict=True + 'cmip6', default=CustomInfo(), strict=True, + alias=[ + ['sic', 'siconc'], + ['tro3', 'o3'], + ] ) def setUp(self): From 7d840a5f245bc2a39e91bf66237e9202bf859a5d Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 31 Mar 2020 09:50:46 +0200 Subject: [PATCH 02/13] refactor table --- esmvalcore/cmor/table.py | 202 ++++++++++++++++++++++++--------------- 1 file changed, 126 insertions(+), 76 deletions(-) diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index 406b96a7cb..2c082f12c6 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -11,6 +11,7 @@ import logging import os from functools import total_ordering +from collections import Counter from pathlib import Path import yaml @@ -60,39 +61,91 @@ def read_cmor_tables(cfg_developer=None): CMOR_TABLES['custom'] = custom install_dir = os.path.dirname(os.path.realpath(__file__)) for table in cfg_developer: - project = cfg_developer[table] - cmor_type = project.get('cmor_type', 'CMIP5') - default_path = os.path.join(install_dir, 'tables', cmor_type.lower()) - table_path = project.get('cmor_path', default_path) - table_path = os.path.expandvars(os.path.expanduser(table_path)) - cmor_strict = project.get('cmor_strict', True) - default_table_prefix = project.get('cmor_default_table_prefix', '') - - if cmor_type == 'CMIP3': - CMOR_TABLES[table] = CMIP3Info( - table_path, - default=custom, - strict=cmor_strict, - alias=alias, - ) - elif cmor_type == 'CMIP5': - CMOR_TABLES[table] = CMIP5Info( - table_path, - default=custom, - strict=cmor_strict, - alias=alias - ) - elif cmor_type == 'CMIP6': - CMOR_TABLES[table] = CMIP6Info( - table_path, - default=custom, - strict=cmor_strict, - default_table_prefix=default_table_prefix, - alias=alias, - ) - - -class InfoBase(object): + CMOR_TABLES[table] = _read_table( + cfg_developer, table, install_dir, custom, alias) + + +def _read_table(cfg_developer, table, install_dir, custom, alias): + project = cfg_developer[table] + cmor_type = project.get('cmor_type', 'CMIP5') + default_path = os.path.join(install_dir, 'tables', cmor_type.lower()) + table_path = project.get('cmor_path', default_path) + table_path = os.path.expandvars(os.path.expanduser(table_path)) + cmor_strict = project.get('cmor_strict', True) + default_table_prefix = project.get('cmor_default_table_prefix', '') + + if cmor_type == 'CMIP3': + return CMIP3Info( + table_path, + default=custom, + strict=cmor_strict, + alias=alias, + ) + + if cmor_type == 'CMIP5': + return CMIP5Info( + table_path, + default=custom, + strict=cmor_strict, + alias=alias + ) + + if cmor_type == 'CMIP6': + return CMIP6Info( + table_path, + default=custom, + strict=cmor_strict, + default_table_prefix=default_table_prefix, + alias=alias, + ) + raise ValueError(f'Unsupported CMOR type {cmor_type}') + + +class InfoBase(): + """ + Base class for all table info classes. + + This uses CMOR 3 json format + + Parameters + ---------- + default: object + Default table to look variables on if not found + + alias: list[list[str]] + List of known aliases for variables + + strict: bool + If False, will look for a variable in other tables if it can not be + found in the requested one + + """ + + def __init__(self, default, alias, strict): + if alias is None: + alias = "" + self.default = default + self.alias = alias + self.strict = strict + self.tables = {} + + def get_table(self, table): + """ + Search and return the table info. + + Parameters + ---------- + table: basestring + Table name + + Returns + ------- + TableInfo + Return the TableInfo object for the requested table if + found, returns None if not + + """ + return self.tables.get(table) def get_variable(self, table_name, short_name, derived=False): """ @@ -114,11 +167,7 @@ def get_variable(self, table_name, short_name, derived=False): found, returns None if not """ - aliases = [short_name] - for alias_list in self.alias: - if short_name in alias_list: - aliases.extend( - [alias for alias in alias_list if alias not in aliases]) + aliases = self._get_aliases(short_name) table = self.get_table(table_name) if table: @@ -128,23 +177,47 @@ def get_variable(self, table_name, short_name, derived=False): except KeyError: pass + var_info = self._look_in_all_tables(aliases) + if not var_info: + var_info = self._look_in_default(derived, aliases, table_name) + if var_info: + var_info = self._update_mip_info(table_name, var_info) + + return var_info + + def _look_in_default(self, derived, aliases, table_name): var_info = None - if not self.strict: + if (not self.strict or derived): for alias in aliases: - var_info = self._look_all_tables(alias) + var_info = self.default.get_variable(table_name, alias) if var_info: break - if not var_info and (not self.strict or derived): + return var_info + + def _look_in_all_tables(self, aliases): + var_info = None + if not self.strict: for alias in aliases: - var_info = self.default.get_variable(table_name, alias) + var_info = self._look_all_tables(alias) if var_info: break + return var_info - if var_info: - mip_info = self.get_table(table_name) - var_info = var_info.copy() - if mip_info: - var_info.frequency = mip_info.frequency + + def _get_aliases(self, short_name): + aliases = [short_name] + for alias_list in self.alias: + if short_name in alias_list: + aliases.extend( + [alias for alias in alias_list if alias not in aliases]) + return aliases + + def _update_mip_info(self, table_name, var_info): + var_info = var_info.copy() + mip_info = self.get_table(table_name) + if mip_info: + var_info.frequency = mip_info.frequency + return var_info def _look_all_tables(self, alias): for table_vars in sorted(self.tables.values()): @@ -176,22 +249,20 @@ class CMIP6Info(InfoBase): def __init__(self, cmor_tables_path, default=None, - alias=[], + alias=None, strict=True, default_table_prefix=''): + + super().__init__(default, alias, strict) cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') if glob.glob(os.path.join(self._cmor_folder, '*_CV.json')): self._load_controlled_vocabulary() - self.default = default - self.alias = alias - self.strict = strict + self.default_table_prefix = default_table_prefix - self.tables = {} self.var_to_freq = {} - self.strict = strict self._load_coordinates() for json_file in glob.glob(os.path.join(self._cmor_folder, '*.json')): @@ -241,7 +312,6 @@ def _load_table(self, json_file): self.var_to_freq[table.name][var_name] = var.frequency if not table.frequency: - from collections import Counter var_freqs = (var.frequency for var in table.values()) table_freq, _ = Counter(var_freqs).most_common(1)[0] table.frequency = table_freq @@ -581,7 +651,8 @@ class CMIP5Info(InfoBase): """ - def __init__(self, cmor_tables_path, default=None, alias={}, strict=True): + def __init__(self, cmor_tables_path, default=None, alias=None, strict=True): + super().__init__(default, alias, strict) cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') @@ -592,9 +663,6 @@ def __init__(self, cmor_tables_path, default=None, alias={}, strict=True): self.strict = strict self.tables = {} self.coords = {} - self.default = default - self.strict = strict - self.alias = alias self._current_table = None self._last_line_read = None @@ -804,24 +872,6 @@ def __init__(self, cmor_tables_path=None): print(msg) raise - def get_table(self, table): - """ - Search and return the table info. - - Parameters - ---------- - table: basestring - Table name - - Returns - ------- - TableInfo - Return the TableInfo object for the requested table if - found, returns None if not - - """ - return self.tables.get(table) - def get_variable(self, table, short_name, derived=False): """ Search and return the variable info. From 0a497b74cd27a0728204247c166acf58308bc491 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 31 Mar 2020 10:06:59 +0200 Subject: [PATCH 03/13] Fix flake8 --- esmvalcore/cmor/table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index 2c082f12c6..9182fedf06 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -651,7 +651,8 @@ class CMIP5Info(InfoBase): """ - def __init__(self, cmor_tables_path, default=None, alias=None, strict=True): + def __init__(self, cmor_tables_path, default=None, alias=None, + strict=True): super().__init__(default, alias, strict) cmor_tables_path = self._get_cmor_path(cmor_tables_path) From c52860a59d06d8123adcc91b202d59b67b2a4766 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 31 Mar 2020 10:20:54 +0200 Subject: [PATCH 04/13] Add original short name and use it in data_finder --- esmvalcore/_data_finder.py | 4 +++- esmvalcore/_recipe.py | 2 +- tests/integration/data_finder.yml | 31 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 7a27dee4f6..782042022b 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -219,10 +219,12 @@ def _get_filenames_glob(variable, drs): def _find_input_files(variable, rootpath, drs): + short_name = variable['short_name'] + variable['short_name'] = variable['original_short_name'] input_dirs = _find_input_dirs(variable, rootpath, drs) filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) - + variable['short_name'] = short_name return (files, input_dirs, filenames_glob) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b48f935f49..b8c402d6ee 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -109,7 +109,7 @@ def _add_cmor_info(variable, override=False): raise RecipeError( f"Unable to load CMOR table (project) '{project}' for variable " f"'{short_name}' with mip '{mip}'") - + variable['original_short_name'] = table_entry.short_name for key in cmor_keys: if key not in variable or override: value = getattr(table_entry, key, None) diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml index 3d73cdd17e..81f907a046 100644 --- a/tests/integration/data_finder.yml +++ b/tests/integration/data_finder.yml @@ -4,6 +4,7 @@ get_output_file: - variable: &variable variable_group: test short_name: ta + original_short_name: ta dataset: HadGEM2-ES project: CMIP5 cmor_table: CMIP5 @@ -42,6 +43,36 @@ get_input_filelist: found_files: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc + - drs: default + variable: + variable_group: test + short_name: tro3 + original_short_name: o3 + dataset: HadGEM2-ES + project: CMIP6 + cmor_table: CMIP6 + institute: [INPE, MOHC] + frequency: mon + modeling_realm: [atmos] + mip: Amon + exp: historical + ensemble: r1i1p1 + start_year: 1960 + end_year: 1980 + diagnostic: test_diag + preprocessor: test_preproc + grid: gn + available_files: + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_193412-195911.nc + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_195912-198411.nc + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_198412-200511.nc + dirs: + - '' + file_patterns: + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn*.nc + found_files: + - o3_Amon_HadGEM2-ES_historical_r1i1p1_gn_195912-198411.nc + - drs: default variable: <<: *variable From a081eec4abc5e41d96f434206f97dd7017fe6335 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 31 Mar 2020 10:39:52 +0200 Subject: [PATCH 05/13] Fix failing test --- tests/integration/test_recipe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index bf1864eaec..06a99d3fa3 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -697,6 +697,7 @@ def test_simple_cordex_recipe(tmp_path, patched_datafinder, 'recipe_dataset_index': 0, 'rcm_version': 'v1', 'short_name': 'tas', + 'original_short_name': 'tas', 'standard_name': 'air_temperature', 'start_year': 1991, 'units': 'K', From ed1aac173fbc594704112351476cf2182f7f89a9 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 31 Mar 2020 15:00:16 +0200 Subject: [PATCH 06/13] Change var_name to alias in recipe --- esmvalcore/_recipe.py | 2 ++ esmvalcore/preprocessor/_io.py | 9 ++++++++- tests/integration/preprocessor/_io/test_save.py | 8 ++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b8c402d6ee..e8aa549097 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -348,6 +348,8 @@ def _get_default_settings(variable, config_user, derive=False): # Configure saving cubes to file settings['save'] = {'compress': config_user['compress_netcdf']} + if variable['short_name'] != variable['original_short_name']: + settings['save']['alias'] = variable['short_name'] return settings diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index b374357ca0..ff776dd370 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -198,7 +198,8 @@ def concatenate(cubes): return result -def save(cubes, filename, optimize_access='', compress=False, **kwargs): +def save(cubes, filename, optimize_access='', compress=False, alias='', + **kwargs): """ Save iris cubes to file. @@ -263,6 +264,12 @@ def save(cubes, filename, optimize_access='', compress=False, **kwargs): for index, length in enumerate(cube.shape)) kwargs['fill_value'] = GLOBAL_FILL_VALUE + if alias: + + for cube in cubes: + logger.debug( + 'Changing var_name from %s to %s', cube.var_name, alias) + cube.var_name = alias iris.save(cubes, **kwargs) return filename diff --git a/tests/integration/preprocessor/_io/test_save.py b/tests/integration/preprocessor/_io/test_save.py index 12404c55da..cc6c98364c 100644 --- a/tests/integration/preprocessor/_io/test_save.py +++ b/tests/integration/preprocessor/_io/test_save.py @@ -57,6 +57,14 @@ def test_save(self): loaded_cube = iris.load_cube(path) self._compare_cubes(cube, loaded_cube) + def test_save_alias(self): + """Test save""" + cube, filename = self._create_sample_cube() + path = save([cube], filename, alias='alias') + loaded_cube = iris.load_cube(path) + self._compare_cubes(cube, loaded_cube) + self.assertEqual(loaded_cube.var_name, 'alias') + def test_save_zlib(self): """Test save""" cube, filename = self._create_sample_cube() From d2913e1dc7c7eeac47f10de8477a776560222354 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 6 Apr 2020 13:50:54 +0200 Subject: [PATCH 07/13] Change variable alias to variable alt_names --- esmvalcore/cmor/table.py | 81 ++++++++++--------- ...iable_alias.yml => variable_alt_names.yml} | 0 tests/integration/cmor/test_table.py | 6 +- 3 files changed, 44 insertions(+), 43 deletions(-) rename esmvalcore/cmor/{variable_alias.yml => variable_alt_names.yml} (100%) diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index 9182fedf06..cfa6e0aa35 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -52,9 +52,9 @@ def read_cmor_tables(cfg_developer=None): cfg_developer = yaml.safe_load(file) cwd = os.path.dirname(os.path.realpath(__file__)) - alias_file = os.path.join(cwd, 'variable_alias.yml') - with open(alias_file, 'r') as yfile: - alias = yaml.safe_load(yfile) + var_alt_names_file = os.path.join(cwd, 'variable_alt_names.yml') + with open(var_alt_names_file, 'r') as yfile: + alt_names = yaml.safe_load(yfile) custom = CustomInfo() CMOR_TABLES.clear() @@ -62,10 +62,10 @@ def read_cmor_tables(cfg_developer=None): install_dir = os.path.dirname(os.path.realpath(__file__)) for table in cfg_developer: CMOR_TABLES[table] = _read_table( - cfg_developer, table, install_dir, custom, alias) + cfg_developer, table, install_dir, custom, alt_names) -def _read_table(cfg_developer, table, install_dir, custom, alias): +def _read_table(cfg_developer, table, install_dir, custom, alt_names): project = cfg_developer[table] cmor_type = project.get('cmor_type', 'CMIP5') default_path = os.path.join(install_dir, 'tables', cmor_type.lower()) @@ -79,7 +79,7 @@ def _read_table(cfg_developer, table, install_dir, custom, alias): table_path, default=custom, strict=cmor_strict, - alias=alias, + alt_names=alt_names, ) if cmor_type == 'CMIP5': @@ -87,7 +87,7 @@ def _read_table(cfg_developer, table, install_dir, custom, alias): table_path, default=custom, strict=cmor_strict, - alias=alias + alt_names=alt_names ) if cmor_type == 'CMIP6': @@ -96,7 +96,7 @@ def _read_table(cfg_developer, table, install_dir, custom, alias): default=custom, strict=cmor_strict, default_table_prefix=default_table_prefix, - alias=alias, + alt_names=alt_names, ) raise ValueError(f'Unsupported CMOR type {cmor_type}') @@ -112,8 +112,8 @@ class InfoBase(): default: object Default table to look variables on if not found - alias: list[list[str]] - List of known aliases for variables + alt_names: list[list[str]] + List of known alternative names for variables strict: bool If False, will look for a variable in other tables if it can not be @@ -121,11 +121,11 @@ class InfoBase(): """ - def __init__(self, default, alias, strict): - if alias is None: - alias = "" + def __init__(self, default, alt_names, strict): + if alt_names is None: + alt_names = "" self.default = default - self.alias = alias + self.alt_names = alt_names self.strict = strict self.tables = {} @@ -167,50 +167,51 @@ def get_variable(self, table_name, short_name, derived=False): found, returns None if not """ - aliases = self._get_aliases(short_name) + alt_names_list = self._get_alt_names_list(short_name) table = self.get_table(table_name) if table: - for alias in aliases: + for alt_names in alt_names_list: try: - return table[alias] + return table[alt_names] except KeyError: pass - var_info = self._look_in_all_tables(aliases) + var_info = self._look_in_all_tables(alt_names_list) if not var_info: - var_info = self._look_in_default(derived, aliases, table_name) + var_info = self._look_in_default( + derived, alt_names_list, table_name) if var_info: var_info = self._update_mip_info(table_name, var_info) return var_info - def _look_in_default(self, derived, aliases, table_name): + def _look_in_default(self, derived, alt_names_list, table_name): var_info = None if (not self.strict or derived): - for alias in aliases: - var_info = self.default.get_variable(table_name, alias) + for alt_names in alt_names_list: + var_info = self.default.get_variable(table_name, alt_names) if var_info: break return var_info - def _look_in_all_tables(self, aliases): + def _look_in_all_tables(self, alt_names_list): var_info = None if not self.strict: - for alias in aliases: - var_info = self._look_all_tables(alias) + for alt_names in alt_names_list: + var_info = self._look_all_tables(alt_names) if var_info: break return var_info - - def _get_aliases(self, short_name): - aliases = [short_name] - for alias_list in self.alias: - if short_name in alias_list: - aliases.extend( - [alias for alias in alias_list if alias not in aliases]) - return aliases + def _get_alt_names_list(self, short_name): + alt_names_list = [short_name] + for alt_names in self.alt_names: + if short_name in alt_names: + alt_names_list.extend( + [alt_name for alt_name in alt_names + if alt_name not in alt_names_list]) + return alt_names_list def _update_mip_info(self, table_name, var_info): var_info = var_info.copy() @@ -219,10 +220,10 @@ def _update_mip_info(self, table_name, var_info): var_info.frequency = mip_info.frequency return var_info - def _look_all_tables(self, alias): + def _look_all_tables(self, alt_names): for table_vars in sorted(self.tables.values()): - if alias in table_vars: - return table_vars[alias] + if alt_names in table_vars: + return table_vars[alt_names] return None @@ -249,11 +250,11 @@ class CMIP6Info(InfoBase): def __init__(self, cmor_tables_path, default=None, - alias=None, + alt_names=None, strict=True, default_table_prefix=''): - super().__init__(default, alias, strict) + super().__init__(default, alt_names, strict) cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') @@ -651,9 +652,9 @@ class CMIP5Info(InfoBase): """ - def __init__(self, cmor_tables_path, default=None, alias=None, + def __init__(self, cmor_tables_path, default=None, alt_names=None, strict=True): - super().__init__(default, alias, strict) + super().__init__(default, alt_names, strict) cmor_tables_path = self._get_cmor_path(cmor_tables_path) self._cmor_folder = os.path.join(cmor_tables_path, 'Tables') diff --git a/esmvalcore/cmor/variable_alias.yml b/esmvalcore/cmor/variable_alt_names.yml similarity index 100% rename from esmvalcore/cmor/variable_alias.yml rename to esmvalcore/cmor/variable_alt_names.yml diff --git a/tests/integration/cmor/test_table.py b/tests/integration/cmor/test_table.py index 2e090d17aa..63a5ab15f4 100644 --- a/tests/integration/cmor/test_table.py +++ b/tests/integration/cmor/test_table.py @@ -19,7 +19,7 @@ def setUpClass(cls): """ cls.variables_info = CMIP6Info( 'cmip6', default=CustomInfo(), strict=True, - alias=[ + alt_names=[ ['sic', 'siconc'], ['tro3', 'o3'], ] @@ -48,8 +48,8 @@ def test_get_variable_tas(self): var = self.variables_info.get_variable('Amon', 'tas') self.assertEqual(var.short_name, 'tas') - def test_get_variable_from_alias(self): - """Get a variable from a known alias.""" + def test_get_variable_from_alt_names(self): + """Get a variable from a known alt_names.""" var = self.variables_info.get_variable('SImon', 'sic') self.assertEqual(var.short_name, 'siconc') From 7f999c18b960136678060be10d5050c67115ed84 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 6 Apr 2020 13:57:06 +0200 Subject: [PATCH 08/13] Address comments --- esmvalcore/cmor/table.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index cfa6e0aa35..bba016f4d0 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -158,7 +158,9 @@ def get_variable(self, table_name, short_name, derived=False): short_name: basestring Variable's short name derived: bool, optional - Variable is derived. Info retrieval is less strict + Variable is derived. Info retrieval for derived variables always + look on the default tables if variable is not find in the + requested table Returns ------- @@ -182,7 +184,8 @@ def get_variable(self, table_name, short_name, derived=False): var_info = self._look_in_default( derived, alt_names_list, table_name) if var_info: - var_info = self._update_mip_info(table_name, var_info) + var_info = var_info.copy() + var_info = self._update_frequency_from_mip(table_name, var_info) return var_info @@ -213,8 +216,7 @@ def _get_alt_names_list(self, short_name): if alt_name not in alt_names_list]) return alt_names_list - def _update_mip_info(self, table_name, var_info): - var_info = var_info.copy() + def _update_frequency_from_mip(self, table_name, var_info): mip_info = self.get_table(table_name) if mip_info: var_info.frequency = mip_info.frequency @@ -885,7 +887,9 @@ def get_variable(self, table, short_name, derived=False): short_name: basestring Variable's short name derived: bool, optional - Variable is derived. Info retrieval is less strict + Variable is derived. Info retrieval for derived variables always + look on the default tables if variable is not find in the + requested table Returns ------- From d59a2a71cd88c5c71309f228435e5d109e6433aa Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 1 Jul 2020 19:24:44 +0200 Subject: [PATCH 09/13] Remove original short name from metadata --- esmvalcore/preprocessor/_io.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index ff776dd370..f132644c1d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -330,6 +330,8 @@ def write_metadata(products, write_ncl=False): if isinstance(product.attributes.get('exp'), (list, tuple)): product.attributes = dict(product.attributes) product.attributes['exp'] = '-'.join(product.attributes['exp']) + if 'original_short_name' in product.attributes: + del product.attributes['original_short_name'] metadata[product.filename] = product.attributes output_filename = os.path.join(output_dir, 'metadata.yml') From 6d582fa20f3671d04a2967d18ede13a80c9c416b Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 1 Oct 2020 10:45:40 +0200 Subject: [PATCH 10/13] Add doc --- doc/recipe/overview.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index 85a80f5180..c95db0ef25 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -317,6 +317,16 @@ define the ``start_year`` and ``end_year`` items in the variable section, because the diagnostic script assumes that all the data has the same time range. +Variable short names usually do not change between datasets supported by +ESMValTool, as they are usually changed to match CMIP. Nevertheless, there are +small changes in variable names in CMIP6 respect to CMIP5 (i.e. sea ice +concentration changed from ``sic`` to ``siconc``). ESMValTool is aware of some +of them and can do the automatic translation when needed. It will even do the +translation in the preprocessed file so the diagnostic does not have to default +with this complexity. For example, if ``sic`` is requested, ESMValTool will +find ``sic`` or ``siconc`` depending on the project, but all preprocessed files +while use ``sic`` as their short_name. + Diagnostic and variable specific datasets ----------------------------------------- The ``additional_datasets`` option can be used to add datasets beyond those From b358cd8df54e840928d45b20a53db5a705b3360a Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 1 Oct 2020 11:18:34 +0200 Subject: [PATCH 11/13] Update doc/recipe/overview.rst Co-authored-by: Bouwe Andela --- doc/recipe/overview.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index c95db0ef25..9e91d51edf 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -319,7 +319,7 @@ range. Variable short names usually do not change between datasets supported by ESMValTool, as they are usually changed to match CMIP. Nevertheless, there are -small changes in variable names in CMIP6 respect to CMIP5 (i.e. sea ice +small changes in variable names in CMIP6 with respect to CMIP5 (i.e. sea ice concentration changed from ``sic`` to ``siconc``). ESMValTool is aware of some of them and can do the automatic translation when needed. It will even do the translation in the preprocessed file so the diagnostic does not have to default From 2b471b143651c0f8f6d1f5aad92dc0ce31b48736 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 1 Oct 2020 11:21:01 +0200 Subject: [PATCH 12/13] Update overview.rst --- doc/recipe/overview.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index 9e91d51edf..5169947984 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -323,9 +323,12 @@ small changes in variable names in CMIP6 with respect to CMIP5 (i.e. sea ice concentration changed from ``sic`` to ``siconc``). ESMValTool is aware of some of them and can do the automatic translation when needed. It will even do the translation in the preprocessed file so the diagnostic does not have to default -with this complexity. For example, if ``sic`` is requested, ESMValTool will +with this complexity, setting the short name in all files to match the one used +by the recipe. For example, if ``sic`` is requested, ESMValTool will find ``sic`` or ``siconc`` depending on the project, but all preprocessed files -while use ``sic`` as their short_name. +while use ``sic`` as their short_name. If the recipe requested ``siconc``, the +preprocessed files will be identical except that they will use the short_name +``siconc`` instead. Diagnostic and variable specific datasets ----------------------------------------- From 0d1a782aaf451de200d5695e2eea9fdfa6ff3ef0 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 1 Oct 2020 14:20:13 +0200 Subject: [PATCH 13/13] Small improvements to documentation --- doc/recipe/overview.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index 5169947984..cac5bc086d 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -318,11 +318,11 @@ because the diagnostic script assumes that all the data has the same time range. Variable short names usually do not change between datasets supported by -ESMValTool, as they are usually changed to match CMIP. Nevertheless, there are +ESMValCore, as they are usually changed to match CMIP. Nevertheless, there are small changes in variable names in CMIP6 with respect to CMIP5 (i.e. sea ice -concentration changed from ``sic`` to ``siconc``). ESMValTool is aware of some +concentration changed from ``sic`` to ``siconc``). ESMValCore is aware of some of them and can do the automatic translation when needed. It will even do the -translation in the preprocessed file so the diagnostic does not have to default +translation in the preprocessed file so the diagnostic does not have to deal with this complexity, setting the short name in all files to match the one used by the recipe. For example, if ``sic`` is requested, ESMValTool will find ``sic`` or ``siconc`` depending on the project, but all preprocessed files