From d3207450ac0cb0f23b00b46c48f176382ea862b1 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 29 Apr 2019 15:31:39 +0100 Subject: [PATCH 01/54] added fx path and file templates for fx vars for CMIP6 --- esmvaltool/config-developer.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/esmvaltool/config-developer.yml b/esmvaltool/config-developer.yml index 06433da1bf..628315861c 100644 --- a/esmvaltool/config-developer.yml +++ b/esmvaltool/config-developer.yml @@ -25,6 +25,10 @@ CMIP6: DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' + input_fx_dir: + default: '/' + BADC: '[institute]/[dataset]/[exp]/[ensemble]/[fxdir]/[short_name]/[grid]/[latestversion]' + input_fx_file: '[fx_var]_[fxdir]_[dataset]_[exp]_[ensemble]_[grid].nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: From 6810170181c50c921ba499401b040359338252f0 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 29 Apr 2019 15:41:06 +0100 Subject: [PATCH 02/54] replaced dodgy fxdir with actual mip --- esmvaltool/config-developer.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvaltool/config-developer.yml b/esmvaltool/config-developer.yml index 628315861c..aa2b6604c2 100644 --- a/esmvaltool/config-developer.yml +++ b/esmvaltool/config-developer.yml @@ -27,8 +27,8 @@ CMIP6: input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' input_fx_dir: default: '/' - BADC: '[institute]/[dataset]/[exp]/[ensemble]/[fxdir]/[short_name]/[grid]/[latestversion]' - input_fx_file: '[fx_var]_[fxdir]_[dataset]_[exp]_[ensemble]_[grid].nc' + BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' + input_fx_file: '[fx_var]_[mip]_[dataset]_[exp]_[ensemble]_[grid].nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: From 07adaca072bbcd8c442dcf70d1e07b429a4d6f6c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 29 Apr 2019 15:43:22 +0100 Subject: [PATCH 03/54] replaced fxvar to short_name --- esmvaltool/config-developer.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool/config-developer.yml b/esmvaltool/config-developer.yml index aa2b6604c2..cee0fea0cd 100644 --- a/esmvaltool/config-developer.yml +++ b/esmvaltool/config-developer.yml @@ -28,7 +28,7 @@ CMIP6: input_fx_dir: default: '/' BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' - input_fx_file: '[fx_var]_[mip]_[dataset]_[exp]_[ensemble]_[grid].nc' + input_fx_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid].nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: From 09e3a1264d86adec6bd15765d93f415a055e528a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 1 May 2019 12:19:00 +0100 Subject: [PATCH 04/54] implemented changes for fx files support for CMIP6 and actual treatment of fx variables as regular variables --- esmvaltool/_data_finder.py | 17 ++++++---- esmvaltool/_recipe.py | 60 ++++++++++++++++++++++++++++-------- esmvaltool/_recipe_checks.py | 18 ++++++----- 3 files changed, 69 insertions(+), 26 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index ba6e21d532..2529578984 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -19,7 +19,7 @@ def find_files(dirnames, filenames): """Find files matching filenames in dirnames.""" - logger.debug("Looking for files matching %s in %s", filenames, dirnames) + logger.info("Looking for files matching %s in %s", filenames, dirnames) result = [] for dirname in dirnames: @@ -229,9 +229,9 @@ def _get_filenames_glob(variable, drs, fx_var=None): def _find_input_files(variable, rootpath, drs, fx_var=None): - logger.debug("Looking for input %sfiles for variable %s of dataset %s", - fx_var + ' fx ' if fx_var else '', variable['short_name'], - variable['dataset']) + logger.info("Looking for input %sfiles for variable %s of dataset %s", + fx_var + ' fx ' if fx_var else '', variable['short_name'], + variable['dataset']) input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var) filenames_glob = _get_filenames_glob(variable, drs, fx_var) @@ -243,16 +243,21 @@ def _find_input_files(variable, rootpath, drs, fx_var=None): def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" files = _find_input_files(variable, rootpath, drs) - files = select_files(files, variable['start_year'], variable['end_year']) + if 'fxvar' not in variable.keys(): + files = select_files(files, variable['start_year'], + variable['end_year']) return files def get_input_fx_filelist(variable, rootpath, drs): """Return a dict with the full path to fx input files.""" fx_files = {} + if 'fxvar' in variable.keys(): + variable['fx_files'] = [variable['short_name']] for fx_var in variable['fx_files']: var = dict(variable) - var['mip'] = replace_mip_fx(fx_var) + if var['project'] == 'CMIP5': + var['mip'] = replace_mip_fx(fx_var) table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) var['frequency'] = table.frequency realm = getattr(table.get(var['short_name']), 'modeling_realm', None) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 5478a2f55d..90256bed72 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -106,6 +106,7 @@ def _add_cmor_info(variable, override=False): logger.warning("Unknown CMOR table %s", variable['cmor_table']) derive = variable.get('derive', False) + fx_att = variable.get('fxvar', False) # Copy the following keys from CMOR table cmor_keys = [ 'standard_name', 'long_name', 'units', 'modeling_realm', 'frequency' @@ -118,6 +119,9 @@ def _add_cmor_info(variable, override=False): if derive and table_entry is None: custom_table = CMOR_TABLES['custom'] table_entry = custom_table.get_variable(mip, short_name) + if fx_att and table_entry is None: + custom_table = CMOR_TABLES['custom'] + table_entry = custom_table.get_variable(mip, short_name) if table_entry is None: raise RecipeError( @@ -438,10 +442,17 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset.""" # Find input files locally. - input_files = get_input_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + if 'fxvar' not in variable.keys(): + input_files = get_input_filelist( + variable=variable, + rootpath=config_user['rootpath'], + drs=config_user['drs']) + else: + input_files = get_input_fx_filelist( + variable=variable, + rootpath=config_user['rootpath'], + drs=config_user['drs'])[variable['short_name']] + input_files = [input_files] # Set up downloading using synda if requested. # Do not download if files are already available locally. @@ -607,7 +618,8 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, settings=settings, config_user=config_user) _update_fx_settings( - settings=settings, variable=variable, config_user=config_user) + settings=settings, variable=variable, + config_user=config_user) _update_target_grid( variable=variable, variables=variables, @@ -866,9 +878,15 @@ def _initialize_datasets(raw_datasets): check.duplicate_datasets(datasets) return datasets - def _initialize_variables(self, raw_variable, raw_datasets): + def _initialize_variables(self, raw_variable, + raw_datasets, raw_variables): """Define variables for all datasets.""" variables = [] + # identify which ones are fx variables + fxvariables = [ + d for d in raw_variables if + 'fxvar' in list(raw_variables[d].keys()) + ] raw_variable = deepcopy(raw_variable) datasets = self._initialize_datasets( @@ -908,10 +926,28 @@ def _initialize_variables(self, raw_variable, raw_datasets): for fx_file in variable['fx_files']: DATASET_KEYS.add(fx_file) # Get the fx files - variable['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) + for fx_var in variable['fx_files']: + if fxvariables: + real_fx_var = [ + raw_variables[k] for k in + fxvariables if k == fx_var + ][0] + if real_fx_var: + fx_var_copy = deepcopy(variable) + fx_var_copy['short_name'] = fx_var + fx_var_copy['mip'] = real_fx_var['mip'] + fx_var_copy['fxvar'] = True + fx_var_copy['grid'] = real_fx_var['grid'] + fx_var_copy['variable_group'] = fx_var + variable['fx_files'] = get_input_fx_filelist( + variable=fx_var_copy, + rootpath=self._cfg['rootpath'], + drs=self._cfg['drs']) + else: + variable['fx_files'] = get_input_fx_filelist( + variable=variable, + rootpath=self._cfg['rootpath'], + drs=self._cfg['drs']) logger.info("Using fx files for var %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], variable['fx_files']) @@ -925,7 +961,6 @@ def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, diagnostic_name) preprocessor_output = {} - for variable_group, raw_variable in raw_variables.items(): raw_variable = deepcopy(raw_variable) raw_variable['variable_group'] = variable_group @@ -935,7 +970,8 @@ def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, raw_variable['preprocessor'] = str( raw_variable.get('preprocessor', 'default')) preprocessor_output[variable_group] = \ - self._initialize_variables(raw_variable, raw_datasets) + self._initialize_variables(raw_variable, + raw_datasets, raw_variables) return preprocessor_output diff --git a/esmvaltool/_recipe_checks.py b/esmvaltool/_recipe_checks.py index a6ab607df0..3cdceb563f 100644 --- a/esmvaltool/_recipe_checks.py +++ b/esmvaltool/_recipe_checks.py @@ -97,15 +97,17 @@ def data_availability(input_files, var): required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() - for filename in input_files: - start, end = get_start_end_year(filename) - available_years.update(range(start, end + 1)) + if 'fxvar' not in var.keys(): + for filename in input_files: + start, end = get_start_end_year(filename) + available_years.update(range(start, end + 1)) - missing_years = required_years - available_years - if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), input_files)) + missing_years = required_years - available_years + if missing_years: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): From 5747d44d66cf9c535534923115a48a766700592b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 1 May 2019 15:47:23 +0100 Subject: [PATCH 05/54] added a dedicated function for fx files dicovery and added profile with no time extraction for fx variables --- esmvaltool/_data_finder.py | 2 +- esmvaltool/_recipe.py | 70 +++++++++++++++++++++++--------------- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 2529578984..183786b2cb 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -19,7 +19,7 @@ def find_files(dirnames, filenames): """Find files matching filenames in dirnames.""" - logger.info("Looking for files matching %s in %s", filenames, dirnames) + logger.debug("Looking for files matching %s in %s", filenames, dirnames) result = [] for dirname in dirnames: diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 90256bed72..fad209fa3f 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -784,6 +784,16 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): name=derive_name) derive_tasks.append(task) + # set profile if fxvar + if 'fxvar' in variable.keys(): + profile = {'fix_file': True, + 'load': True, + 'fix_metadata': True, + 'extract_time': False, + 'cmor_check_metadata': True, + 'save': True, + 'cleanup': True} + # Create (final) preprocessor task task = _get_single_preprocessor_task( variables, @@ -878,6 +888,37 @@ def _initialize_datasets(raw_datasets): check.duplicate_datasets(datasets) return datasets + def _get_fx_files(self, variable, raw_variables, fxvariables): + """Get all the fx files in variable.""" + for fx_var in variable['fx_files']: + real_fx_var = [] + if fxvariables: + real_fx_var = [ + raw_variables[k] for k in + fxvariables if k == fx_var + ][0] + if real_fx_var: + fx_var_copy = deepcopy(variable) + fx_var_copy['short_name'] = fx_var + fx_var_copy['mip'] = real_fx_var['mip'] + fx_var_copy['fxvar'] = True + fx_var_copy['grid'] = real_fx_var['grid'] + fx_var_copy['variable_group'] = fx_var + variable['fx_files'] = get_input_fx_filelist( + variable=fx_var_copy, + rootpath=self._cfg['rootpath'], + drs=self._cfg['drs']) + # compatible with CMIP5 old way + else: + variable['fx_files'] = get_input_fx_filelist( + variable=variable, + rootpath=self._cfg['rootpath'], + drs=self._cfg['drs']) + logger.info("Using fx files for var %s of dataset %s:\n%s", + variable['short_name'], variable['dataset'], + variable['fx_files']) + return variable + def _initialize_variables(self, raw_variable, raw_datasets, raw_variables): """Define variables for all datasets.""" @@ -925,33 +966,8 @@ def _initialize_variables(self, raw_variable, if 'fx_files' in variable: for fx_file in variable['fx_files']: DATASET_KEYS.add(fx_file) - # Get the fx files - for fx_var in variable['fx_files']: - if fxvariables: - real_fx_var = [ - raw_variables[k] for k in - fxvariables if k == fx_var - ][0] - if real_fx_var: - fx_var_copy = deepcopy(variable) - fx_var_copy['short_name'] = fx_var - fx_var_copy['mip'] = real_fx_var['mip'] - fx_var_copy['fxvar'] = True - fx_var_copy['grid'] = real_fx_var['grid'] - fx_var_copy['variable_group'] = fx_var - variable['fx_files'] = get_input_fx_filelist( - variable=fx_var_copy, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) - else: - variable['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) - logger.info("Using fx files for var %s of dataset %s:\n%s", - variable['short_name'], variable['dataset'], - variable['fx_files']) - + variable = self._get_fx_files(variable, + raw_variables, fxvariables) return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, From 53a7f41a10963e626a57f37875bfde8610c2dd69 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 1 May 2019 16:06:31 +0100 Subject: [PATCH 06/54] added conditional on fx frequency to disable time axis checks --- esmvaltool/cmor/check.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/esmvaltool/cmor/check.py b/esmvaltool/cmor/check.py index be72ad7575..5e0a95ded7 100644 --- a/esmvaltool/cmor/check.py +++ b/esmvaltool/cmor/check.py @@ -95,13 +95,15 @@ def check_metadata(self, logger=None): self._check_fill_value() self._check_dim_names() self._check_coords() - self._check_time_coord() + if self.frequency != 'fx': + self._check_time_coord() self._check_rank() self.report_warnings(logger) self.report_errors() - self._add_auxiliar_time_coordinates() + if self.frequency != 'fx': + self._add_auxiliar_time_coordinates() return self._cube def report_errors(self): From 70da3c7883dd1a364c03ef7c77b6641b71ed1990 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 1 May 2019 17:34:16 +0100 Subject: [PATCH 07/54] reassigning the output of the fxvar var as input for fx_file for the other variable --- esmvaltool/_recipe.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index fad209fa3f..04d944a6e9 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -890,6 +890,7 @@ def _initialize_datasets(raw_datasets): def _get_fx_files(self, variable, raw_variables, fxvariables): """Get all the fx files in variable.""" + dict_variable_fx = {} for fx_var in variable['fx_files']: real_fx_var = [] if fxvariables: @@ -904,16 +905,18 @@ def _get_fx_files(self, variable, raw_variables, fxvariables): fx_var_copy['fxvar'] = True fx_var_copy['grid'] = real_fx_var['grid'] fx_var_copy['variable_group'] = fx_var - variable['fx_files'] = get_input_fx_filelist( - variable=fx_var_copy, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) + # use the output of the fx variable as input fx_file + dict_variable_fx[fx_var] = get_output_file( + fx_var_copy, + self._cfg['preproc_dir']) # compatible with CMIP5 old way else: variable['fx_files'] = get_input_fx_filelist( variable=variable, rootpath=self._cfg['rootpath'], drs=self._cfg['drs']) + if dict_variable_fx: + variable['fx_files'] = dict_variable_fx logger.info("Using fx files for var %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], variable['fx_files']) From d9565ba51b570fa1eb4c50ed515ad21cf5d7cdba Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 2 May 2019 12:02:39 +0100 Subject: [PATCH 08/54] generalize fx variable handling for CMIP5 as well --- esmvaltool/_data_finder.py | 3 +++ esmvaltool/_recipe.py | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 183786b2cb..1ff8039b72 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -258,6 +258,9 @@ def get_input_fx_filelist(variable, rootpath, drs): var = dict(variable) if var['project'] == 'CMIP5': var['mip'] = replace_mip_fx(fx_var) + var['frequency'] = 'fx' + var['modeling_realm'] = 'fx' + var['ensemble'] = 'r0i0p0' table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) var['frequency'] = table.frequency realm = getattr(table.get(var['short_name']), 'modeling_realm', None) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 04d944a6e9..559c9edc90 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -901,9 +901,15 @@ def _get_fx_files(self, variable, raw_variables, fxvariables): if real_fx_var: fx_var_copy = deepcopy(variable) fx_var_copy['short_name'] = fx_var - fx_var_copy['mip'] = real_fx_var['mip'] + if fx_var_copy['project'] == 'CMIP5': + fx_var_copy['mip'] = 'fx' + else: + fx_var_copy['mip'] = real_fx_var['mip'] fx_var_copy['fxvar'] = True - fx_var_copy['grid'] = real_fx_var['grid'] + if fx_var_copy['project'] == 'CMIP6': + # force grid if different from parent variable + if 'grid' in real_fx_var.keys(): + fx_var_copy['grid'] = real_fx_var['grid'] fx_var_copy['variable_group'] = fx_var # use the output of the fx variable as input fx_file dict_variable_fx[fx_var] = get_output_file( @@ -961,7 +967,11 @@ def _initialize_variables(self, raw_variable, } for variable in variables: - _update_from_others(variable, ['cmor_table', 'mip'], datasets) + # special case fo cmip5 fx variables + if 'fxvar' in variable.keys() and variable['project'] == 'CMIP5': + variable['mip'] = 'fx' + else: + _update_from_others(variable, ['cmor_table', 'mip'], datasets) institute = get_institutes(variable) if institute: variable['institute'] = institute From d8f7ae86a1e5060072a2ad7e650b303694b27710 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 2 May 2019 15:37:47 +0100 Subject: [PATCH 09/54] correct implementation of fx files handling for both cmip5 and cmip6 including cmor checks and output --- esmvaltool/_recipe.py | 74 +++++++++++++++++++++++++------------------ esmvaltool/_task.py | 23 +++++++++++--- 2 files changed, 62 insertions(+), 35 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 559c9edc90..882eeb1bf3 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -365,6 +365,38 @@ def _get_default_settings(variable, config_user, derive=False): return settings +def _convert_fxvar_to_cmor(fx_var, variable, real_fx_var): + """Convert fx var to full cmor var with variable as parent.""" + fx_var_copy = deepcopy(variable) + fx_var_copy['short_name'] = fx_var + if fx_var_copy['project'] == 'CMIP5': + fx_var_copy['mip'] = 'fx' + else: + fx_var_copy['mip'] = real_fx_var['mip'] + fx_var_copy['fxvar'] = True + if fx_var_copy['project'] == 'CMIP6': + # force grid if different from parent variable + if 'grid' in real_fx_var.keys(): + fx_var_copy['grid'] = real_fx_var['grid'] + fx_var_copy['variable_group'] = fx_var + + return fx_var_copy + + +def _update_fx_files(variable, config_user): + """Update the list of variables if needing special fx variables.""" + fx_files_dict = {} + for fx_var in variable['fx_files']: + real_fx_var = {'mip': 'fx'} + fx_var_copy = _convert_fxvar_to_cmor(fx_var, + variable, + real_fx_var) + fx_files_dict[fx_var] = get_output_file( + fx_var_copy, + config_user['preproc_dir']) + return fx_files_dict + + def _update_fx_settings(settings, variable, config_user): """Find and set the FX derive/mask settings.""" # update for derive @@ -374,10 +406,7 @@ def _update_fx_settings(settings, variable, config_user): if 'fx_files' in var: _augment(var, variable) fx_files.update( - get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs'])) + _update_fx_files(var, config_user)) settings['derive']['fx_files'] = fx_files # update for landsea @@ -389,10 +418,7 @@ def _update_fx_settings(settings, variable, config_user): var = dict(variable) var['fx_files'] = ['sftlf', 'sftof'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + fx_files_dict = _update_fx_files(var, config_user) # allow both sftlf and sftof if fx_files_dict['sftlf']: @@ -407,10 +433,7 @@ def _update_fx_settings(settings, variable, config_user): var = dict(variable) var['fx_files'] = ['sftgif'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + fx_files_dict = _update_fx_files(var, config_user) # allow sftgif (only, for now) if fx_files_dict['sftgif']: @@ -419,11 +442,8 @@ def _update_fx_settings(settings, variable, config_user): for step in ('average_region', 'average_volume'): if settings.get(step, {}).get('fx_files'): - settings[step]['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs'], - ) + settings[step]['fx_files'] = _update_fx_files(var, + config_user) def _read_attributes(filename): @@ -784,7 +804,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): name=derive_name) derive_tasks.append(task) - # set profile if fxvar + # set profile and tasks if fxvar if 'fxvar' in variable.keys(): profile = {'fix_file': True, 'load': True, @@ -793,6 +813,8 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): 'cmor_check_metadata': True, 'save': True, 'cleanup': True} + task_name = task_name.split( + TASKSEP)[0] + TASKSEP + 'fx_' + variables[0]['variable_group'] # Create (final) preprocessor task task = _get_single_preprocessor_task( @@ -899,19 +921,9 @@ def _get_fx_files(self, variable, raw_variables, fxvariables): fxvariables if k == fx_var ][0] if real_fx_var: - fx_var_copy = deepcopy(variable) - fx_var_copy['short_name'] = fx_var - if fx_var_copy['project'] == 'CMIP5': - fx_var_copy['mip'] = 'fx' - else: - fx_var_copy['mip'] = real_fx_var['mip'] - fx_var_copy['fxvar'] = True - if fx_var_copy['project'] == 'CMIP6': - # force grid if different from parent variable - if 'grid' in real_fx_var.keys(): - fx_var_copy['grid'] = real_fx_var['grid'] - fx_var_copy['variable_group'] = fx_var - # use the output of the fx variable as input fx_file + fx_var_copy = _convert_fxvar_to_cmor(fx_var, + variable, + real_fx_var) dict_variable_fx[fx_var] = get_output_file( fx_var_copy, self._cfg['preproc_dir']) diff --git a/esmvaltool/_task.py b/esmvaltool/_task.py index 091b0028a0..8b27281152 100644 --- a/esmvaltool/_task.py +++ b/esmvaltool/_task.py @@ -234,7 +234,6 @@ def _run(self, input_files): def str(self): """Return a nicely formatted description.""" - def _indent(txt): return '\n'.join('\t' + line for line in txt.split('\n')) @@ -578,20 +577,36 @@ def get_independent_tasks(tasks): return independent_tasks +def _priority_tasks(tasks): + """Assign priority to certain first tasks if needed.""" + first_tasks = [] + for task in get_independent_tasks(tasks): + if task.name.split('/')[-1].startswith('fx_'): + first_tasks.append(task) + get_independent_tasks(tasks).remove(task) + return first_tasks, get_independent_tasks(tasks) + + def run_tasks(tasks, max_parallel_tasks=None): """Run tasks.""" if max_parallel_tasks == 1: _run_tasks_sequential(tasks) else: - _run_tasks_parallel(tasks, max_parallel_tasks) + first_tasks, remainder_tasks = _priority_tasks(tasks) + if first_tasks: + _run_tasks_parallel(first_tasks, max_parallel_tasks) + _run_tasks_parallel(remainder_tasks, max_parallel_tasks) def _run_tasks_sequential(tasks): """Run tasks sequentially.""" n_tasks = len(get_flattened_tasks(tasks)) logger.info("Running %s tasks sequentially", n_tasks) - - for task in get_independent_tasks(tasks): + first_tasks, remainder_tasks = _priority_tasks(tasks) + if first_tasks: + for task in first_tasks: + task.run() + for task in remainder_tasks: task.run() From 266fcee70bc2aac4e076bd82a47cccd7364b95ce Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 3 May 2019 12:50:47 +0100 Subject: [PATCH 10/54] implemented automation if fx vars are present in variable but are not listed as variables in recipe --- esmvaltool/_recipe.py | 52 ++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 882eeb1bf3..03e57781e9 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -940,20 +940,9 @@ def _get_fx_files(self, variable, raw_variables, fxvariables): variable['fx_files']) return variable - def _initialize_variables(self, raw_variable, - raw_datasets, raw_variables): - """Define variables for all datasets.""" + def _assemble_varlist(self, raw_variable, datasets, fxvariables): + """Assemble complete list of variables.""" variables = [] - # identify which ones are fx variables - fxvariables = [ - d for d in raw_variables if - 'fxvar' in list(raw_variables[d].keys()) - ] - - raw_variable = deepcopy(raw_variable) - datasets = self._initialize_datasets( - raw_datasets + raw_variable.pop('additional_datasets', [])) - for index, dataset in enumerate(datasets): variable = deepcopy(raw_variable) variable.update(dataset) @@ -967,6 +956,43 @@ def _initialize_variables(self, raw_variable, variable['start_year'] + self._cfg['max_years'] - 1) variables.append(variable) + # extend variables if fx variables are needed but not in recipe + if not fxvariables and 'fx_files' in raw_variable.keys(): + for fx_var_name in raw_variable['fx_files']: + for index, dataset in enumerate(datasets): + fx_variable = deepcopy(raw_variable) + fx_variable['short_name'] = fx_var_name + fx_variable.update(dataset) + fx_variable['recipe_dataset_index'] = index + if ('cmor_table' not in fx_variable + and fx_variable.get('project') in CMOR_TABLES): + fx_variable['cmor_table'] = fx_variable['project'] + real_fx_var = {'mip': 'fx'} + fx_variable = _convert_fxvar_to_cmor(fx_var_name, + fx_variable, + real_fx_var) + del fx_variable['fx_files'] + variables.append(fx_variable) + + return variables + + def _initialize_variables(self, raw_variable, + raw_datasets, raw_variables): + """Define variables for all datasets.""" + # identify which other vars are fx vars for processing + fxvariables = [ + d for d in raw_variables if + 'fxvar' in list(raw_variables[d].keys()) + ] + + raw_variable = deepcopy(raw_variable) + datasets = self._initialize_datasets( + raw_datasets + raw_variable.pop('additional_datasets', [])) + + # get full list of variables + variables = self._assemble_varlist(raw_variable, + datasets, fxvariables) + required_keys = { 'short_name', 'mip', From fa456d74f4bd4b928b0d76b7a3092f4ab218d0ac Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 6 May 2019 19:34:43 +0100 Subject: [PATCH 11/54] rewritten the fx var setup and made it more robust --- esmvaltool/_recipe.py | 150 +++++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 66 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 03e57781e9..7e7d6671ea 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -769,6 +769,39 @@ def append(group_prefix, var): return derive_input +def _get_fx_tasks(fx_variables, config_user, task_name, derive_tasks): + """Create a list of fx variables tasks.""" + fx_tasks = [] + fx_profile = {'fix_file': True, + 'load': True, + 'fix_metadata': True, + 'extract_time': False, + 'cmor_check_metadata': True, + 'save': True, + 'cleanup': True} + fx_short_names = list(set([var['short_name'] for var in fx_variables])) + + for fx_short_name in fx_short_names: + fx_variables_group = [ + var for var in fx_variables if var['short_name'] == fx_short_name + ] + fx_task_name = task_name.split( + TASKSEP)[0] + TASKSEP + 'fx_' + fx_short_name + logger.info("Creating preprocessor '%s' task for variable '%s'", + fx_variables_group[0]['preprocessor'], + fx_variables_group[0]['short_name']) + # Create (final) preprocessor task + fx_task = _get_single_preprocessor_task( + fx_variables_group, + fx_profile, + config_user, + ancestor_tasks=derive_tasks, + name=fx_task_name) + fx_tasks.append(fx_task) + + return fx_tasks + + def _get_preprocessor_task(variables, profiles, config_user, task_name): """Create preprocessor task(s) for a set of datasets.""" # First set up the preprocessor profile @@ -779,8 +812,6 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): "Unknown preprocessor {} in variable {} of diagnostic {}".format( preproc_name, variable['short_name'], variable['diagnostic'])) profile = deepcopy(profiles[variable['preprocessor']]) - logger.info("Creating preprocessor '%s' task for variable '%s'", - variable['preprocessor'], variable['short_name']) variables = _limit_datasets(variables, profile, config_user.get('max_datasets')) for variable in variables: @@ -804,27 +835,29 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): name=derive_name) derive_tasks.append(task) - # set profile and tasks if fxvar - if 'fxvar' in variable.keys(): - profile = {'fix_file': True, - 'load': True, - 'fix_metadata': True, - 'extract_time': False, - 'cmor_check_metadata': True, - 'save': True, - 'cleanup': True} - task_name = task_name.split( - TASKSEP)[0] + TASKSEP + 'fx_' + variables[0]['variable_group'] - - # Create (final) preprocessor task - task = _get_single_preprocessor_task( - variables, - profile, - config_user, - ancestor_tasks=derive_tasks, - name=task_name) - - return task + # set profile and tasks formultiple variables + tasks = [] + regular_variables = [var for var in variables if 'fxvar' not in var.keys()] + fx_variables = [var for var in variables if 'fxvar' in var.keys()] + + if fx_variables: + fx_task_list = _get_fx_tasks(fx_variables, config_user, + task_name, derive_tasks) + tasks.extend(fx_task_list) + if regular_variables: + logger.info("Creating preprocessor '%s' task for variable '%s'", + regular_variables[0]['preprocessor'], + regular_variables[0]['short_name']) + # add all regular variable task + task = _get_single_preprocessor_task( + regular_variables, + profile, + config_user, + ancestor_tasks=derive_tasks, + name=task_name) + tasks.append(task) + + return tasks class Recipe: @@ -910,29 +943,17 @@ def _initialize_datasets(raw_datasets): check.duplicate_datasets(datasets) return datasets - def _get_fx_files(self, variable, raw_variables, fxvariables): - """Get all the fx files in variable.""" + def _get_fx_files(self, variable, variables): + """Get all the fx files.""" dict_variable_fx = {} for fx_var in variable['fx_files']: - real_fx_var = [] - if fxvariables: - real_fx_var = [ - raw_variables[k] for k in - fxvariables if k == fx_var - ][0] - if real_fx_var: - fx_var_copy = _convert_fxvar_to_cmor(fx_var, - variable, - real_fx_var) - dict_variable_fx[fx_var] = get_output_file( - fx_var_copy, - self._cfg['preproc_dir']) - # compatible with CMIP5 old way - else: - variable['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) + fx_var_dict = [ + var for var in variables if var['short_name'] == fx_var + and var['dataset'] == variable['dataset'] + ][0] + dict_variable_fx[fx_var] = get_output_file( + fx_var_dict, + self._cfg['preproc_dir']) if dict_variable_fx: variable['fx_files'] = dict_variable_fx logger.info("Using fx files for var %s of dataset %s:\n%s", @@ -940,9 +961,10 @@ def _get_fx_files(self, variable, raw_variables, fxvariables): variable['fx_files']) return variable - def _assemble_varlist(self, raw_variable, datasets, fxvariables): + def _assemble_varlist(self, raw_variable, datasets): """Assemble complete list of variables.""" variables = [] + for index, dataset in enumerate(datasets): variable = deepcopy(raw_variable) variable.update(dataset) @@ -954,10 +976,13 @@ def _assemble_varlist(self, raw_variable, datasets, fxvariables): variable['end_year'] = min( variable['end_year'], variable['start_year'] + self._cfg['max_years'] - 1) + variables.append(variable) - # extend variables if fx variables are needed but not in recipe - if not fxvariables and 'fx_files' in raw_variable.keys(): + # look for fx files if needed + if not 'fx_files' in raw_variable.keys(): + return variables + else: for fx_var_name in raw_variable['fx_files']: for index, dataset in enumerate(datasets): fx_variable = deepcopy(raw_variable) @@ -971,27 +996,19 @@ def _assemble_varlist(self, raw_variable, datasets, fxvariables): fx_variable = _convert_fxvar_to_cmor(fx_var_name, fx_variable, real_fx_var) - del fx_variable['fx_files'] + if 'fx_files' in fx_variable.keys(): + del fx_variable['fx_files'] variables.append(fx_variable) + return variables - return variables - - def _initialize_variables(self, raw_variable, - raw_datasets, raw_variables): + def _initialize_variables(self, raw_variable, raw_datasets): """Define variables for all datasets.""" - # identify which other vars are fx vars for processing - fxvariables = [ - d for d in raw_variables if - 'fxvar' in list(raw_variables[d].keys()) - ] - raw_variable = deepcopy(raw_variable) datasets = self._initialize_datasets( raw_datasets + raw_variable.pop('additional_datasets', [])) # get full list of variables - variables = self._assemble_varlist(raw_variable, - datasets, fxvariables) + variables = self._assemble_varlist(raw_variable, datasets) required_keys = { 'short_name', @@ -1017,8 +1034,7 @@ def _initialize_variables(self, raw_variable, if 'fx_files' in variable: for fx_file in variable['fx_files']: DATASET_KEYS.add(fx_file) - variable = self._get_fx_files(variable, - raw_variables, fxvariables) + variable = self._get_fx_files(variable, variables) return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, @@ -1037,8 +1053,7 @@ def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, raw_variable['preprocessor'] = str( raw_variable.get('preprocessor', 'default')) preprocessor_output[variable_group] = \ - self._initialize_variables(raw_variable, - raw_datasets, raw_variables) + self._initialize_variables(raw_variable, raw_datasets) return preprocessor_output @@ -1123,13 +1138,16 @@ def initialize_tasks(self): for variable_group in diagnostic['preprocessor_output']: task_name = diagnostic_name + TASKSEP + variable_group logger.info("Creating preprocessor task %s", task_name) - task = _get_preprocessor_task( + + run_tasks = _get_preprocessor_task( variables=diagnostic['preprocessor_output'] [variable_group], profiles=self._preprocessors, config_user=self._cfg, task_name=task_name) - tasks.add(task) + + for task in run_tasks: + tasks.add(task) # Create diagnostic tasks for script_name, script_cfg in diagnostic['scripts'].items(): From 37f58a32f5cb9f98e7adda2678b5b1557ec60102 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 7 May 2019 14:29:08 +0100 Subject: [PATCH 12/54] cleaned up and modularized --- esmvaltool/_recipe.py | 123 +++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 55 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 7e7d6671ea..d4d1e7922c 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -6,11 +6,12 @@ from copy import deepcopy import yaml + from netCDF4 import Dataset -from . import __version__ from . import _recipe_checks as check -from ._config import TAGS, get_institutes, replace_tags +from . import __version__ +from ._config import TAGS, _get_cmip6_fx_mip, get_institutes, replace_tags from ._data_finder import (get_input_filelist, get_input_fx_filelist, get_output_file, get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance @@ -365,32 +366,36 @@ def _get_default_settings(variable, config_user, derive=False): return settings -def _convert_fxvar_to_cmor(fx_var, variable, real_fx_var): - """Convert fx var to full cmor var with variable as parent.""" - fx_var_copy = deepcopy(variable) - fx_var_copy['short_name'] = fx_var - if fx_var_copy['project'] == 'CMIP5': - fx_var_copy['mip'] = 'fx' - else: - fx_var_copy['mip'] = real_fx_var['mip'] - fx_var_copy['fxvar'] = True - if fx_var_copy['project'] == 'CMIP6': - # force grid if different from parent variable - if 'grid' in real_fx_var.keys(): - fx_var_copy['grid'] = real_fx_var['grid'] - fx_var_copy['variable_group'] = fx_var +def _convert_fxvar_to_cmor(fx_var, variable): + """ + Conversion from a string name to full variable dict + + Convert from string fx_var to full cmor fx variable + with variable as parent. + """ + fx_variable = deepcopy(variable) + + # add internal recognition flag + fx_variable['fxvar'] = True - return fx_var_copy + fx_variable['variable_group'] = fx_var + fx_variable['short_name'] = fx_var + + # specificities of project + if fx_variable['project'] == 'CMIP5': + fx_variable['mip'] = 'fx' + elif fx_variable['project'] == 'CMIP6': + fx_variable['grid'] = variable['grid'] + fx_variable['mip'] = _get_cmip6_fx_mip(fx_var) + + return fx_variable def _update_fx_files(variable, config_user): """Update the list of variables if needing special fx variables.""" fx_files_dict = {} for fx_var in variable['fx_files']: - real_fx_var = {'mip': 'fx'} - fx_var_copy = _convert_fxvar_to_cmor(fx_var, - variable, - real_fx_var) + fx_var_copy = _convert_fxvar_to_cmor(fx_var, variable) fx_files_dict[fx_var] = get_output_file( fx_var_copy, config_user['preproc_dir']) @@ -412,6 +417,10 @@ def _update_fx_settings(settings, variable, config_user): # update for landsea if 'mask_landsea' in settings: # Configure ingestion of land/sea masks + if 'fx_files' not in variable.keys(): + fx_sett = "fx_files: ['sftlf', 'sftof']" + logger.error("You need to specify %s for variable %s", + fx_sett, variable['short_name']) logger.debug('Getting fx mask settings now...') settings['mask_landsea']['fx_files'] = [] @@ -427,6 +436,10 @@ def _update_fx_settings(settings, variable, config_user): settings['mask_landsea']['fx_files'].append(fx_files_dict['sftof']) if 'mask_landseaice' in settings: + if 'fx_files' not in variable.keys(): + fx_sett = "fx_files: ['sftgif']" + logger.error("You need to specify %s for variable %s", + fx_sett, variable['short_name']) logger.debug('Getting fx mask settings now...') settings['mask_landseaice']['fx_files'] = [] @@ -956,9 +969,30 @@ def _get_fx_files(self, variable, variables): self._cfg['preproc_dir']) if dict_variable_fx: variable['fx_files'] = dict_variable_fx - logger.info("Using fx files for var %s of dataset %s:\n%s", - variable['short_name'], variable['dataset'], - variable['fx_files']) + logger.info("Using fx files for var %s of dataset %s:\n%s", + variable['short_name'], variable['dataset'], + variable['fx_files']) + return variable + + def _get_required_var_keys(self, variable, dataset, index, + is_fxvar=False, fx_var_name=None): + """Assemble correct variable attributes.""" + if is_fxvar and fx_var_name: + variable['short_name'] = fx_var_name + variable.update(dataset) + variable['recipe_dataset_index'] = index + if ('cmor_table' not in variable + and variable.get('project') in CMOR_TABLES): + variable['cmor_table'] = variable['project'] + if not is_fxvar: + if 'end_year' in variable and 'max_years' in self._cfg: + variable['end_year'] = min( + variable['end_year'], + variable['start_year'] + self._cfg['max_years'] - 1) + else: + variable = _convert_fxvar_to_cmor(fx_var_name, variable) + if 'fx_files' in variable.keys(): + del variable['fx_files'] return variable def _assemble_varlist(self, raw_variable, datasets): @@ -967,37 +1001,20 @@ def _assemble_varlist(self, raw_variable, datasets): for index, dataset in enumerate(datasets): variable = deepcopy(raw_variable) - variable.update(dataset) - variable['recipe_dataset_index'] = index - if ('cmor_table' not in variable - and variable.get('project') in CMOR_TABLES): - variable['cmor_table'] = variable['project'] - if 'end_year' in variable and 'max_years' in self._cfg: - variable['end_year'] = min( - variable['end_year'], - variable['start_year'] + self._cfg['max_years'] - 1) - + variable = self._get_required_var_keys(variable, dataset, + index) variables.append(variable) - # look for fx files if needed - if not 'fx_files' in raw_variable.keys(): + if 'fx_files' not in raw_variable.keys(): return variables else: for fx_var_name in raw_variable['fx_files']: for index, dataset in enumerate(datasets): fx_variable = deepcopy(raw_variable) - fx_variable['short_name'] = fx_var_name - fx_variable.update(dataset) - fx_variable['recipe_dataset_index'] = index - if ('cmor_table' not in fx_variable - and fx_variable.get('project') in CMOR_TABLES): - fx_variable['cmor_table'] = fx_variable['project'] - real_fx_var = {'mip': 'fx'} - fx_variable = _convert_fxvar_to_cmor(fx_var_name, - fx_variable, - real_fx_var) - if 'fx_files' in fx_variable.keys(): - del fx_variable['fx_files'] + fx_variable = self._get_required_var_keys( + fx_variable, dataset, index, + is_fxvar=True, fx_var_name=fx_var_name + ) variables.append(fx_variable) return variables @@ -1022,15 +1039,13 @@ def _initialize_variables(self, raw_variable, raw_datasets): } for variable in variables: - # special case fo cmip5 fx variables - if 'fxvar' in variable.keys() and variable['project'] == 'CMIP5': - variable['mip'] = 'fx' - else: - _update_from_others(variable, ['cmor_table', 'mip'], datasets) + _update_from_others(variable, ['cmor_table', 'mip'], datasets) institute = get_institutes(variable) if institute: variable['institute'] = institute check.variable(variable, required_keys) + + # add fx files information if 'fx_files' in variable: for fx_file in variable['fx_files']: DATASET_KEYS.add(fx_file) @@ -1138,14 +1153,12 @@ def initialize_tasks(self): for variable_group in diagnostic['preprocessor_output']: task_name = diagnostic_name + TASKSEP + variable_group logger.info("Creating preprocessor task %s", task_name) - run_tasks = _get_preprocessor_task( variables=diagnostic['preprocessor_output'] [variable_group], profiles=self._preprocessors, config_user=self._cfg, task_name=task_name) - for task in run_tasks: tasks.add(task) From bc83b8099cb041f8b6ed0af62c58dc020fd78f0b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 7 May 2019 14:30:11 +0100 Subject: [PATCH 13/54] added mapper for cmip6 fx mips - this needs more variables to be added --- esmvaltool/config-developer.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/esmvaltool/config-developer.yml b/esmvaltool/config-developer.yml index cee0fea0cd..e52aa33c39 100644 --- a/esmvaltool/config-developer.yml +++ b/esmvaltool/config-developer.yml @@ -29,6 +29,8 @@ CMIP6: default: '/' BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' input_fx_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid].nc' + fx_mip_change: + 'areacello': 'Ofx' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: From f575d4424c5f873a2b8351b41cfc59971cafaa28 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 7 May 2019 14:30:56 +0100 Subject: [PATCH 14/54] added function mapper for cmip6 fx variables --- esmvaltool/_config.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/esmvaltool/_config.py b/esmvaltool/_config.py index 221c06d026..dc12fc9ce5 100644 --- a/esmvaltool/_config.py +++ b/esmvaltool/_config.py @@ -187,6 +187,14 @@ def replace_mip_fx(fx_file): return new_mip +def _get_cmip6_fx_mip(fx_var_name): + """Set the correct mip for CMIP6 fx variables.""" + new_mip = CFG['CMIP6']['fx_mip_change'][fx_var_name] + logger.debug("Switching mip for fx variable %s to %s", + fx_var_name, new_mip) + return new_mip + + TAGS_CONFIG_FILE = os.path.join( os.path.dirname(__file__), 'config-references.yml') From eb6fdcde42462f7b9d9a81515bd5ec3f8497f187 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 7 May 2019 14:32:02 +0100 Subject: [PATCH 15/54] added mapping for cmip6 fx variable mip --- esmvaltool/_data_finder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 1ff8039b72..7a79e39059 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -11,7 +11,7 @@ import six -from ._config import get_project_config, replace_mip_fx +from ._config import _get_cmip6_fx_mip, get_project_config, replace_mip_fx from .cmor.table import CMOR_TABLES logger = logging.getLogger(__name__) @@ -261,6 +261,8 @@ def get_input_fx_filelist(variable, rootpath, drs): var['frequency'] = 'fx' var['modeling_realm'] = 'fx' var['ensemble'] = 'r0i0p0' + elif var['project'] == 'CMIP6': + var['mip'] = _get_cmip6_fx_mip(var['short_name']) table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) var['frequency'] = table.frequency realm = getattr(table.get(var['short_name']), 'modeling_realm', None) From eafcecdede1d2ebf9742878e8b99cab72fa3bcf0 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 7 May 2019 15:04:55 +0100 Subject: [PATCH 16/54] fixed a snag that will assign fx file vars to fx file vars bleh --- esmvaltool/_data_finder.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 7a79e39059..04811919c3 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -252,9 +252,12 @@ def get_input_filelist(variable, rootpath, drs): def get_input_fx_filelist(variable, rootpath, drs): """Return a dict with the full path to fx input files.""" fx_files = {} + # keep legacy implementation as well if 'fxvar' in variable.keys(): - variable['fx_files'] = [variable['short_name']] - for fx_var in variable['fx_files']: + fx_vars = [variable['short_name']] + else: + fx_vars = variable['fx_files'] + for fx_var in fx_vars: var = dict(variable) if var['project'] == 'CMIP5': var['mip'] = replace_mip_fx(fx_var) From 633c48f97348da6f8aed7811628213c18294c178 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 8 May 2019 13:56:50 +0100 Subject: [PATCH 17/54] cleaned up and restructured a bit; also added check on tasks remove duplicates --- esmvaltool/_recipe.py | 84 ++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index d4d1e7922c..314743bbfd 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -368,7 +368,7 @@ def _get_default_settings(variable, config_user, derive=False): def _convert_fxvar_to_cmor(fx_var, variable): """ - Conversion from a string name to full variable dict + Conversion from a string name to full variable dict. Convert from string fx_var to full cmor fx variable with variable as parent. @@ -391,13 +391,12 @@ def _convert_fxvar_to_cmor(fx_var, variable): return fx_variable -def _update_fx_files(variable, config_user): - """Update the list of variables if needing special fx variables.""" +def _update_fx_files(fx_varlist, config_user): + """Get the fx files dict for a list of fx variables.""" fx_files_dict = {} - for fx_var in variable['fx_files']: - fx_var_copy = _convert_fxvar_to_cmor(fx_var, variable) - fx_files_dict[fx_var] = get_output_file( - fx_var_copy, + for fx_variable in fx_varlist: + fx_files_dict[fx_variable['short_name']] = get_output_file( + fx_variable, config_user['preproc_dir']) return fx_files_dict @@ -410,8 +409,14 @@ def _update_fx_settings(settings, variable, config_user): for var in get_required(variable['short_name']): if 'fx_files' in var: _augment(var, variable) + # first convert the fx_file strings to real variables + fx_varlist = [ + _convert_fxvar_to_cmor(fx_var, variable) + for fx_var in variable['fx_files'] + ] + # now get the fx files output fx_files.update( - _update_fx_files(var, config_user)) + _update_fx_files(fx_varlist, config_user)) settings['derive']['fx_files'] = fx_files # update for landsea @@ -427,7 +432,13 @@ def _update_fx_settings(settings, variable, config_user): var = dict(variable) var['fx_files'] = ['sftlf', 'sftof'] - fx_files_dict = _update_fx_files(var, config_user) + # first convert the fx_file strings to real variables + fx_varlist = [ + _convert_fxvar_to_cmor(fx_var, variable) + for fx_var in var['fx_files'] + ] + # now get the files + fx_files_dict = _update_fx_files(fx_varlist, config_user) # allow both sftlf and sftof if fx_files_dict['sftlf']: @@ -446,7 +457,13 @@ def _update_fx_settings(settings, variable, config_user): var = dict(variable) var['fx_files'] = ['sftgif'] - fx_files_dict = _update_fx_files(var, config_user) + # first convert the fx_file strings to real variables + fx_varlist = [ + _convert_fxvar_to_cmor(fx_var, variable) + for fx_var in var['fx_files'] + ] + # now get the files + fx_files_dict = _update_fx_files(fx_varlist, config_user) # allow sftgif (only, for now) if fx_files_dict['sftgif']: @@ -455,7 +472,13 @@ def _update_fx_settings(settings, variable, config_user): for step in ('average_region', 'average_volume'): if settings.get(step, {}).get('fx_files'): - settings[step]['fx_files'] = _update_fx_files(var, + # first convert the fx_file strings to real variables + fx_varlist = [ + _convert_fxvar_to_cmor(fx_var, variable) + for fx_var in var['fx_files'] + ] + # now get the files + settings[step]['fx_files'] = _update_fx_files(fx_varlist, config_user) @@ -800,8 +823,7 @@ def _get_fx_tasks(fx_variables, config_user, task_name, derive_tasks): ] fx_task_name = task_name.split( TASKSEP)[0] + TASKSEP + 'fx_' + fx_short_name - logger.info("Creating preprocessor '%s' task for variable '%s'", - fx_variables_group[0]['preprocessor'], + logger.info("Creating preprocessor fx-default task for variable '%s'", fx_variables_group[0]['short_name']) # Create (final) preprocessor task fx_task = _get_single_preprocessor_task( @@ -956,24 +978,6 @@ def _initialize_datasets(raw_datasets): check.duplicate_datasets(datasets) return datasets - def _get_fx_files(self, variable, variables): - """Get all the fx files.""" - dict_variable_fx = {} - for fx_var in variable['fx_files']: - fx_var_dict = [ - var for var in variables if var['short_name'] == fx_var - and var['dataset'] == variable['dataset'] - ][0] - dict_variable_fx[fx_var] = get_output_file( - fx_var_dict, - self._cfg['preproc_dir']) - if dict_variable_fx: - variable['fx_files'] = dict_variable_fx - logger.info("Using fx files for var %s of dataset %s:\n%s", - variable['short_name'], variable['dataset'], - variable['fx_files']) - return variable - def _get_required_var_keys(self, variable, dataset, index, is_fxvar=False, fx_var_name=None): """Assemble correct variable attributes.""" @@ -1049,7 +1053,14 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'fx_files' in variable: for fx_file in variable['fx_files']: DATASET_KEYS.add(fx_file) - variable = self._get_fx_files(variable, variables) + fx_varlist = [ + var for var in variables if 'fxvar' in var.keys() + ] + variable['fx_files'] = _update_fx_files(fx_varlist, + self._cfg) + logger.info("Using fx files for var %s of dataset %s:\n%s", + variable['short_name'], variable['dataset'], + variable['fx_files']) return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, @@ -1173,6 +1184,15 @@ def initialize_tasks(self): name=task_name) tasks.add(task) + # remove duplicates and check + task_dict_list = [{t.name: t} for t in tasks] + unique_names = list(set([t.name for t in tasks])) + tasks = [] + for uniq_name in unique_names: + unique_task = [ + d[uniq_name] for d in task_dict_list + if uniq_name in d.keys()][0] + tasks.append(unique_task) check.tasks_valid(tasks) # Resolve diagnostic ancestors From 5084ef2a3c884d1b036d6ff1d6823e0ad88b63df Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 8 May 2019 14:41:55 +0100 Subject: [PATCH 18/54] found and fixed bug in variable derivation tasking --- esmvaltool/_recipe.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 314743bbfd..96f919b998 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -851,12 +851,19 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): config_user.get('max_datasets')) for variable in variables: _add_cmor_info(variable) + + # manage variable lists for regular and fx variables + tasks = [] + regular_variables = [var for var in variables if 'fxvar' not in var.keys()] + fx_variables = [var for var in variables if 'fxvar' in var.keys()] + # Create preprocessor task(s) derive_tasks = [] if variable.get('derive'): # Create tasks to prepare the input data for the derive step derive_profile, profile = _split_derive_profile(profile) - derive_input = _get_derive_input_variables(variables, config_user) + derive_input = _get_derive_input_variables(regular_variables, + config_user) for derive_variables in derive_input.values(): for derive_variable in derive_variables: @@ -870,11 +877,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): name=derive_name) derive_tasks.append(task) - # set profile and tasks formultiple variables - tasks = [] - regular_variables = [var for var in variables if 'fxvar' not in var.keys()] - fx_variables = [var for var in variables if 'fxvar' in var.keys()] - + # create tasks for regular and fx variables if fx_variables: fx_task_list = _get_fx_tasks(fx_variables, config_user, task_name, derive_tasks) From d83efe431684e0cf8d247513cc03cb6ddabc9329 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 8 May 2019 15:17:06 +0100 Subject: [PATCH 19/54] fixed another derive related bug --- esmvaltool/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 96f919b998..250263d4c8 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -412,7 +412,7 @@ def _update_fx_settings(settings, variable, config_user): # first convert the fx_file strings to real variables fx_varlist = [ _convert_fxvar_to_cmor(fx_var, variable) - for fx_var in variable['fx_files'] + for fx_var in var['fx_files'] ] # now get the fx files output fx_files.update( From 6bf63b43f15dbc34d42dcc0f46e1a2636d3c5aba Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 8 May 2019 18:13:17 +0100 Subject: [PATCH 20/54] reverting task to original since implementation with ancestors is in --- esmvaltool/_task.py | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/esmvaltool/_task.py b/esmvaltool/_task.py index 8b27281152..5de76fe6de 100644 --- a/esmvaltool/_task.py +++ b/esmvaltool/_task.py @@ -9,6 +9,7 @@ import subprocess import threading import time +from copy import deepcopy from multiprocessing import Pool, cpu_count import psutil @@ -234,12 +235,13 @@ def _run(self, input_files): def str(self): """Return a nicely formatted description.""" + def _indent(txt): return '\n'.join('\t' + line for line in txt.split('\n')) - txt = 'ancestors:\n{}'.format( - '\n\n'.join(_indent(str(task)) for task in self.ancestors) - if self.ancestors else 'None') + txt = 'ancestors:\n{}'.format('\n\n'.join( + _indent(str(task)) + for task in self.ancestors) if self.ancestors else 'None') return txt @@ -535,13 +537,15 @@ def _collect_provenance(self): ancestor_products = {p for a in self.ancestors for p in a.products} for filename, attributes in table.items(): + # copy to avoid updating other entries if file contains anchors + attributes = deepcopy(attributes) ancestor_files = attributes.pop('ancestors', []) ancestors = { p for p in ancestor_products if p.filename in ancestor_files } - attributes.update(attrs) + attributes.update(deepcopy(attrs)) for key in attributes: if key in TAGS: attributes[key] = replace_tags(key, attributes[key]) @@ -577,36 +581,20 @@ def get_independent_tasks(tasks): return independent_tasks -def _priority_tasks(tasks): - """Assign priority to certain first tasks if needed.""" - first_tasks = [] - for task in get_independent_tasks(tasks): - if task.name.split('/')[-1].startswith('fx_'): - first_tasks.append(task) - get_independent_tasks(tasks).remove(task) - return first_tasks, get_independent_tasks(tasks) - - def run_tasks(tasks, max_parallel_tasks=None): """Run tasks.""" if max_parallel_tasks == 1: _run_tasks_sequential(tasks) else: - first_tasks, remainder_tasks = _priority_tasks(tasks) - if first_tasks: - _run_tasks_parallel(first_tasks, max_parallel_tasks) - _run_tasks_parallel(remainder_tasks, max_parallel_tasks) + _run_tasks_parallel(tasks, max_parallel_tasks) def _run_tasks_sequential(tasks): """Run tasks sequentially.""" n_tasks = len(get_flattened_tasks(tasks)) logger.info("Running %s tasks sequentially", n_tasks) - first_tasks, remainder_tasks = _priority_tasks(tasks) - if first_tasks: - for task in first_tasks: - task.run() - for task in remainder_tasks: + + for task in get_independent_tasks(tasks): task.run() From 89ded150a7225c2d1dd8f53d0ad8749f9c0f3292 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 8 May 2019 18:19:39 +0100 Subject: [PATCH 21/54] added use of ancestors rather than assigning task priorities in task.py --- esmvaltool/_recipe.py | 105 ++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 56 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 250263d4c8..be282f50a6 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -709,7 +709,10 @@ def _get_single_preprocessor_task(variables, if ancestor_tasks is None: ancestor_tasks = [] order = _extract_preprocessor_order(profile) - ancestor_products = [p for task in ancestor_tasks for p in task.products] + ancestor_products = [ + p for task in ancestor_tasks for p in task.products + if not task.name.split('/')[1].startswith('fx_') + ] products = _get_preprocessor_products( variables=variables, profile=profile, @@ -805,9 +808,8 @@ def append(group_prefix, var): return derive_input -def _get_fx_tasks(fx_variables, config_user, task_name, derive_tasks): +def _get_fx_tasks(fx_variable, config_user, task_name): """Create a list of fx variables tasks.""" - fx_tasks = [] fx_profile = {'fix_file': True, 'load': True, 'fix_metadata': True, @@ -815,26 +817,20 @@ def _get_fx_tasks(fx_variables, config_user, task_name, derive_tasks): 'cmor_check_metadata': True, 'save': True, 'cleanup': True} - fx_short_names = list(set([var['short_name'] for var in fx_variables])) - for fx_short_name in fx_short_names: - fx_variables_group = [ - var for var in fx_variables if var['short_name'] == fx_short_name - ] - fx_task_name = task_name.split( - TASKSEP)[0] + TASKSEP + 'fx_' + fx_short_name - logger.info("Creating preprocessor fx-default task for variable '%s'", - fx_variables_group[0]['short_name']) - # Create (final) preprocessor task - fx_task = _get_single_preprocessor_task( - fx_variables_group, - fx_profile, - config_user, - ancestor_tasks=derive_tasks, - name=fx_task_name) - fx_tasks.append(fx_task) - - return fx_tasks + fx_task_name = task_name.split( + TASKSEP)[0] + TASKSEP + 'fx_' + fx_variable['short_name'] + logger.info("Creating preprocessor fx-default task for variable '%s'", + fx_variable['short_name']) + + # Create (final) preprocessor task + fx_task = _get_single_preprocessor_task( + [fx_variable], + fx_profile, + config_user, + name=fx_task_name) + + return fx_task def _get_preprocessor_task(variables, profiles, config_user, task_name): @@ -853,11 +849,17 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): _add_cmor_info(variable) # manage variable lists for regular and fx variables - tasks = [] regular_variables = [var for var in variables if 'fxvar' not in var.keys()] fx_variables = [var for var in variables if 'fxvar' in var.keys()] - # Create preprocessor task(s) + # create tasks for fx variables (first ones) + fx_tasks = [] + if fx_variables: + fx_tasks = [ + _get_fx_tasks(fx_var, config_user, task_name) + for fx_var in fx_variables] + + # create derive tasks derive_tasks = [] if variable.get('derive'): # Create tasks to prepare the input data for the derive step @@ -874,28 +876,25 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): derive_variables, derive_profile, config_user, + ancestor_tasks=fx_tasks, name=derive_name) derive_tasks.append(task) - # create tasks for regular and fx variables - if fx_variables: - fx_task_list = _get_fx_tasks(fx_variables, config_user, - task_name, derive_tasks) - tasks.extend(fx_task_list) - if regular_variables: - logger.info("Creating preprocessor '%s' task for variable '%s'", - regular_variables[0]['preprocessor'], - regular_variables[0]['short_name']) - # add all regular variable task - task = _get_single_preprocessor_task( - regular_variables, - profile, - config_user, - ancestor_tasks=derive_tasks, - name=task_name) - tasks.append(task) - - return tasks + # cumulative ancestors + all_ancestor_tasks = fx_tasks + derive_tasks + + logger.info("Creating preprocessor '%s' task for variable '%s'", + regular_variables[0]['preprocessor'], + regular_variables[0]['short_name']) + # add all regular variable task + task = _get_single_preprocessor_task( + regular_variables, + profile, + config_user, + ancestor_tasks=all_ancestor_tasks, + name=task_name) + + return task class Recipe: @@ -1167,14 +1166,13 @@ def initialize_tasks(self): for variable_group in diagnostic['preprocessor_output']: task_name = diagnostic_name + TASKSEP + variable_group logger.info("Creating preprocessor task %s", task_name) - run_tasks = _get_preprocessor_task( + task = _get_preprocessor_task( variables=diagnostic['preprocessor_output'] [variable_group], profiles=self._preprocessors, config_user=self._cfg, task_name=task_name) - for task in run_tasks: - tasks.add(task) + tasks.add(task) # Create diagnostic tasks for script_name, script_cfg in diagnostic['scripts'].items(): @@ -1187,16 +1185,11 @@ def initialize_tasks(self): name=task_name) tasks.add(task) - # remove duplicates and check - task_dict_list = [{t.name: t} for t in tasks] - unique_names = list(set([t.name for t in tasks])) - tasks = [] - for uniq_name in unique_names: - unique_task = [ - d[uniq_name] for d in task_dict_list - if uniq_name in d.keys()][0] - tasks.append(unique_task) - check.tasks_valid(tasks) + # check tasks + # TODO fix this: + # currently fails the check for variables with common + # fx files settings. if in the same diagnostic + # check.tasks_valid(tasks) # Resolve diagnostic ancestors self._resolve_diagnostic_ancestors(tasks) From 89377434ee747ed5e33296fc97d96678cf3d0eab Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 8 May 2019 21:43:45 +0100 Subject: [PATCH 22/54] fixed problem with overwriting ancestor files --- esmvaltool/_recipe.py | 68 +++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 4c61678f2d..e2e7df79ba 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -391,13 +391,14 @@ def _convert_fxvar_to_cmor(fx_var, variable): return fx_variable -def _update_fx_files(fx_varlist, config_user): +def _update_fx_files(fx_varlist, config_user, parent_variable): """Get the fx files dict for a list of fx variables.""" fx_files_dict = {} for fx_variable in fx_varlist: fx_files_dict[fx_variable['short_name']] = get_output_file( fx_variable, - config_user['preproc_dir']) + config_user['preproc_dir'], + parent_variable) return fx_files_dict @@ -416,7 +417,7 @@ def _update_fx_settings(settings, variable, config_user): ] # now get the fx files output fx_files.update( - _update_fx_files(fx_varlist, config_user)) + _update_fx_files(fx_varlist, config_user, var)) settings['derive']['fx_files'] = fx_files # update for landsea @@ -438,7 +439,7 @@ def _update_fx_settings(settings, variable, config_user): for fx_var in var['fx_files'] ] # now get the files - fx_files_dict = _update_fx_files(fx_varlist, config_user) + fx_files_dict = _update_fx_files(fx_varlist, config_user, var) # allow both sftlf and sftof if fx_files_dict['sftlf']: @@ -463,7 +464,7 @@ def _update_fx_settings(settings, variable, config_user): for fx_var in var['fx_files'] ] # now get the files - fx_files_dict = _update_fx_files(fx_varlist, config_user) + fx_files_dict = _update_fx_files(fx_varlist, config_user, var) # allow sftgif (only, for now) if fx_files_dict['sftgif']: @@ -479,7 +480,8 @@ def _update_fx_settings(settings, variable, config_user): ] # now get the files settings[step]['fx_files'] = _update_fx_files(fx_varlist, - config_user) + config_user, + var) def _read_attributes(filename): @@ -650,13 +652,18 @@ def get_matching(attributes): def _get_preprocessor_products(variables, profile, order, ancestor_products, - config_user): + config_user, parent_variable=None): """Get preprocessor product definitions for a set of datasets.""" products = set() for variable in variables: - variable['filename'] = get_output_file(variable, - config_user['preproc_dir']) + if not parent_variable: + variable['filename'] = get_output_file(variable, + config_user['preproc_dir']) + else: + variable['filename'] = get_output_file(variable, + config_user['preproc_dir'], + parent_variable) if ancestor_products: grouped_ancestors = _match_products(ancestor_products, variables) @@ -704,7 +711,8 @@ def _get_single_preprocessor_task(variables, profile, config_user, name, - ancestor_tasks=None): + ancestor_tasks=None, + parent_variable=None): """Create preprocessor tasks for a set of datasets.""" if ancestor_tasks is None: ancestor_tasks = [] @@ -713,13 +721,23 @@ def _get_single_preprocessor_task(variables, p for task in ancestor_tasks for p in task.products if not task.name.split('/')[1].startswith('fx_') ] - products = _get_preprocessor_products( - variables=variables, - profile=profile, - order=order, - ancestor_products=ancestor_products, - config_user=config_user, - ) + if not parent_variable: + products = _get_preprocessor_products( + variables=variables, + profile=profile, + order=order, + ancestor_products=ancestor_products, + config_user=config_user, + ) + else: + products = _get_preprocessor_products( + variables=variables, + profile=profile, + order=order, + ancestor_products=ancestor_products, + config_user=config_user, + parent_variable=parent_variable + ) if not products: raise RecipeError( @@ -808,7 +826,7 @@ def append(group_prefix, var): return derive_input -def _get_fx_tasks(fx_variable, config_user, task_name): +def _get_fx_tasks(fx_variable, config_user, task_name, parent_variable): """Create a list of fx variables tasks.""" fx_profile = {'fix_file': True, 'load': True, @@ -828,7 +846,8 @@ def _get_fx_tasks(fx_variable, config_user, task_name): [fx_variable], fx_profile, config_user, - name=fx_task_name) + name=fx_task_name, + parent_variable=parent_variable) return fx_task @@ -856,7 +875,8 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): fx_tasks = [] if fx_variables: fx_tasks = [ - _get_fx_tasks(fx_var, config_user, task_name) + _get_fx_tasks(fx_var, config_user, + task_name, regular_variables[0]) for fx_var in fx_variables] # create derive tasks @@ -1059,7 +1079,8 @@ def _initialize_variables(self, raw_variable, raw_datasets): var for var in variables if 'fxvar' in var.keys() ] variable['fx_files'] = _update_fx_files(fx_varlist, - self._cfg) + self._cfg, + variable) logger.info("Using fx files for var %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], variable['fx_files']) @@ -1189,10 +1210,7 @@ def initialize_tasks(self): tasks.add(task) # check tasks - # TODO fix this: - # currently fails the check for variables with common - # fx files settings. if in the same diagnostic - # check.tasks_valid(tasks) + check.tasks_valid(tasks) # Resolve diagnostic ancestors self._resolve_diagnostic_ancestors(tasks) From 79896c36a53ad4c45a4141d27529dbbb4f83162c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 8 May 2019 21:44:25 +0100 Subject: [PATCH 23/54] allow for a different outfile path if we have a parent variable --- esmvaltool/_data_finder.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 04811919c3..57489e7f59 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -277,7 +277,7 @@ def get_input_fx_filelist(variable, rootpath, drs): return fx_files -def get_output_file(variable, preproc_dir): +def get_output_file(variable, preproc_dir, parent_var=None): """Return the full path to the output (preprocessed) file.""" cfg = get_project_config(variable['project']) @@ -286,12 +286,20 @@ def get_output_file(variable, preproc_dir): variable = dict(variable) variable['exp'] = '-'.join(variable['exp']) - outfile = os.path.join( - preproc_dir, - variable['diagnostic'], - variable['variable_group'], - _replace_tags(cfg['output_file'], variable)[0] + '.nc', - ) + if not parent_var: + outfile = os.path.join( + preproc_dir, + variable['diagnostic'], + variable['variable_group'], + _replace_tags(cfg['output_file'], variable)[0] + '.nc', + ) + else: + outfile = os.path.join( + preproc_dir, + variable['diagnostic'], + parent_var['variable_group'], + _replace_tags(cfg['output_file'], variable)[0] + '.nc', + ) return outfile From e4fe5c123f0f50e260b8c04c8d04030b4eba4a89 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 9 May 2019 13:14:42 +0100 Subject: [PATCH 24/54] generalized for inclusion of fx variables as regular variables in recipe --- esmvaltool/_data_finder.py | 39 ++++++++++++++++-------------------- esmvaltool/_recipe.py | 20 ++++++++++++++---- esmvaltool/_recipe_checks.py | 19 +++++++++--------- 3 files changed, 43 insertions(+), 35 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 57489e7f59..34cfb6a7f4 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -19,7 +19,7 @@ def find_files(dirnames, filenames): """Find files matching filenames in dirnames.""" - logger.debug("Looking for files matching %s in %s", filenames, dirnames) + logger.info("Looking for files matching %s in %s", filenames, dirnames) result = [] for dirname in dirnames: @@ -252,27 +252,22 @@ def get_input_filelist(variable, rootpath, drs): def get_input_fx_filelist(variable, rootpath, drs): """Return a dict with the full path to fx input files.""" fx_files = {} - # keep legacy implementation as well - if 'fxvar' in variable.keys(): - fx_vars = [variable['short_name']] - else: - fx_vars = variable['fx_files'] - for fx_var in fx_vars: - var = dict(variable) - if var['project'] == 'CMIP5': - var['mip'] = replace_mip_fx(fx_var) - var['frequency'] = 'fx' - var['modeling_realm'] = 'fx' - var['ensemble'] = 'r0i0p0' - elif var['project'] == 'CMIP6': - var['mip'] = _get_cmip6_fx_mip(var['short_name']) - table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) - var['frequency'] = table.frequency - realm = getattr(table.get(var['short_name']), 'modeling_realm', None) - var['modeling_realm'] = realm if realm else table.realm - - files = _find_input_files(var, rootpath, drs, fx_var) - fx_files[fx_var] = files[0] if files else None + fx_var = variable['short_name'] + var = dict(variable) + if var['project'] == 'CMIP5': + var['mip'] = replace_mip_fx(fx_var) + var['frequency'] = 'fx' + var['modeling_realm'] = 'fx' + var['ensemble'] = 'r0i0p0' + elif var['project'] == 'CMIP6': + var['mip'] = _get_cmip6_fx_mip(var['short_name']) + table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) + var['frequency'] = table.frequency + realm = getattr(table.get(var['short_name']), 'modeling_realm', None) + var['modeling_realm'] = realm if realm else table.realm + + files = _find_input_files(var, rootpath, drs, fx_var) + fx_files[fx_var] = files[0] if files else None return fx_files diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index e2e7df79ba..4e870f41fa 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -501,10 +501,17 @@ def _get_input_files(variable, config_user): """Get the input files for a single dataset.""" # Find input files locally. if 'fxvar' not in variable.keys(): - input_files = get_input_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + if 'is_fx_variable' not in variable.keys(): + input_files = get_input_filelist( + variable=variable, + rootpath=config_user['rootpath'], + drs=config_user['drs']) + else: + input_files = get_input_fx_filelist( + variable=variable, + rootpath=config_user['rootpath'], + drs=config_user['drs'])[variable['short_name']] + input_files = [input_files] else: input_files = get_input_fx_filelist( variable=variable, @@ -906,6 +913,11 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): logger.info("Creating preprocessor '%s' task for variable '%s'", regular_variables[0]['preprocessor'], regular_variables[0]['short_name']) + + # don't do time gating for fx variables + if 'is_fx_variable' in regular_variables[0].keys(): + profile['extract_time'] = False + # add all regular variable task task = _get_single_preprocessor_task( regular_variables, diff --git a/esmvaltool/_recipe_checks.py b/esmvaltool/_recipe_checks.py index 3cdceb563f..e04c299039 100644 --- a/esmvaltool/_recipe_checks.py +++ b/esmvaltool/_recipe_checks.py @@ -98,16 +98,17 @@ def data_availability(input_files, var): required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() if 'fxvar' not in var.keys(): - for filename in input_files: - start, end = get_start_end_year(filename) - available_years.update(range(start, end + 1)) + if 'is_fx_variable' not in var.keys(): + for filename in input_files: + start, end = get_start_end_year(filename) + available_years.update(range(start, end + 1)) - missing_years = required_years - available_years - if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), - input_files)) + missing_years = required_years - available_years + if missing_years: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): From adb2551e182201557e7ccbcba08aac24fc92ce67 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 9 May 2019 14:10:15 +0100 Subject: [PATCH 25/54] fixed data finder test --- tests/integration/test_data_finder.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 484fdc99b7..565cb314b1 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -103,7 +103,12 @@ def test_get_input_fx_filelist(root, cfg): # Find files rootpath = {cfg['variable']['project']: [root]} drs = {cfg['variable']['project']: cfg['drs']} - fx_files = get_input_fx_filelist(cfg['variable'], rootpath, drs) + fx_variable = dict(cfg['variable']) + fx_files = {} + for fxvar in cfg['variable']['fx_files']: + fx_variable['short_name'] = fxvar + fx_files[fxvar] = get_input_fx_filelist(fx_variable, + rootpath, drs)[fxvar] # Test result reference = { From 438bc0424577d9e3eb71d5013f1674fc0d260797 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 14:25:19 +0100 Subject: [PATCH 26/54] removed all references to fx files and dirs --- esmvaltool/config-developer.yml | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/esmvaltool/config-developer.yml b/esmvaltool/config-developer.yml index e52aa33c39..80b28157eb 100644 --- a/esmvaltool/config-developer.yml +++ b/esmvaltool/config-developer.yml @@ -24,13 +24,7 @@ CMIP6: BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' - input_fx_dir: - default: '/' - BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' - input_fx_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid].nc' - fx_mip_change: - 'areacello': 'Ofx' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: @@ -153,26 +147,7 @@ CMIP5: ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/' SMHI: '[dataset]/[ensemble]/[exp]/[frequency]' BSC: '[type]/[project]/[exp]/[dataset.lower]' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_*.nc' - input_fx_dir: - default: '/' - BADC: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - CP4CDS: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[fx_var]/latest/' - DKRZ: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - ETHZ: '[exp]/fx/[fx_var]/[dataset]/r0i0p0' - input_fx_file: '[fx_var]_fx_[dataset]_[exp]_r0i0p0.nc' - fx_mip_change: - 'areacella': 'Amon' - 'areacello': 'Omon' - 'basin': 'Omon' - 'deptho': 'Omon' - 'mrsofc': 'Lmon' - 'orog': 'Amon' - 'rootd': 'Lmon' - 'sftgif': 'Lmon' - 'sftlf': 'Amon' - 'sftof': 'Omon' - 'volcello': 'Omon' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' institutes: 'ACCESS1-0': ['CSIRO-BOM'] From 5a5d61712198082659bae8602e7c576b5582f641 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 14:28:10 +0100 Subject: [PATCH 27/54] removed fx related functions --- esmvaltool/_data_finder.py | 57 ++++++++++---------------------------- 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 34cfb6a7f4..098fd9c568 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -11,8 +11,7 @@ import six -from ._config import _get_cmip6_fx_mip, get_project_config, replace_mip_fx -from .cmor.table import CMOR_TABLES +from ._config import get_project_config logger = logging.getLogger(__name__) @@ -95,7 +94,7 @@ def select_files(filenames, start_year, end_year): return selection -def _replace_tags(path, variable, fx_var=None): +def _replace_tags(path, variable): """Replace tags in the config-developer's file with actual values.""" path = path.strip('/') @@ -106,9 +105,7 @@ def _replace_tags(path, variable, fx_var=None): original_tag = tag tag, _, _ = _get_caps_options(tag) - if tag == 'fx_var': - replacewith = fx_var - elif tag == 'latestversion': # handled separately later + if tag == 'latestversion': # handled separately later continue elif tag in variable: replacewith = variable[tag] @@ -198,16 +195,16 @@ def get_rootpath(rootpath, project): raise KeyError('default rootpath must be specified in config-user file') -def _find_input_dirs(variable, rootpath, drs, fx_var=None): +def _find_input_dirs(variable, rootpath, drs): """Return a the full paths to input directories.""" project = variable['project'] root = get_rootpath(rootpath, project) - input_type = 'input_{}dir'.format('fx_' if fx_var else '') + input_type = 'input_dir' path_template = _select_drs(input_type, drs, project) dirnames = [] - for dirname_template in _replace_tags(path_template, variable, fx_var): + for dirname_template in _replace_tags(path_template, variable): for base_path in root: dirname = os.path.join(base_path, dirname_template) dirname = _resolve_latestversion(dirname) @@ -220,21 +217,17 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None): return dirnames -def _get_filenames_glob(variable, drs, fx_var=None): +def _get_filenames_glob(variable, drs): """Return patterns that can be used to look for input files.""" - input_type = 'input_{}file'.format('fx_' if fx_var else '') + input_type = 'input_file' path_template = _select_drs(input_type, drs, variable['project']) - filenames_glob = _replace_tags(path_template, variable, fx_var) + filenames_glob = _replace_tags(path_template, variable) return filenames_glob -def _find_input_files(variable, rootpath, drs, fx_var=None): - logger.info("Looking for input %sfiles for variable %s of dataset %s", - fx_var + ' fx ' if fx_var else '', variable['short_name'], - variable['dataset']) - - input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var) - filenames_glob = _get_filenames_glob(variable, drs, fx_var) +def _find_input_files(variable, rootpath, drs): + input_dirs = _find_input_dirs(variable, rootpath, drs) + filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) return files @@ -243,35 +236,13 @@ def _find_input_files(variable, rootpath, drs, fx_var=None): def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" files = _find_input_files(variable, rootpath, drs) - if 'fxvar' not in variable.keys(): + # do time gating only for non-fx variables + if variable['frequency'] != 'fx': files = select_files(files, variable['start_year'], variable['end_year']) return files -def get_input_fx_filelist(variable, rootpath, drs): - """Return a dict with the full path to fx input files.""" - fx_files = {} - fx_var = variable['short_name'] - var = dict(variable) - if var['project'] == 'CMIP5': - var['mip'] = replace_mip_fx(fx_var) - var['frequency'] = 'fx' - var['modeling_realm'] = 'fx' - var['ensemble'] = 'r0i0p0' - elif var['project'] == 'CMIP6': - var['mip'] = _get_cmip6_fx_mip(var['short_name']) - table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) - var['frequency'] = table.frequency - realm = getattr(table.get(var['short_name']), 'modeling_realm', None) - var['modeling_realm'] = realm if realm else table.realm - - files = _find_input_files(var, rootpath, drs, fx_var) - fx_files[fx_var] = files[0] if files else None - - return fx_files - - def get_output_file(variable, preproc_dir, parent_var=None): """Return the full path to the output (preprocessed) file.""" cfg = get_project_config(variable['project']) From 8db5f55d02a39206c155f08a3c18de211b2baca6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 14:29:36 +0100 Subject: [PATCH 28/54] time chacks to be done only if the variable is not fx --- esmvaltool/_recipe_checks.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/esmvaltool/_recipe_checks.py b/esmvaltool/_recipe_checks.py index e04c299039..ee0e569304 100644 --- a/esmvaltool/_recipe_checks.py +++ b/esmvaltool/_recipe_checks.py @@ -97,18 +97,18 @@ def data_availability(input_files, var): required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() - if 'fxvar' not in var.keys(): - if 'is_fx_variable' not in var.keys(): - for filename in input_files: - start, end = get_start_end_year(filename) - available_years.update(range(start, end + 1)) - - missing_years = required_years - available_years - if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), - input_files)) + # check time avail only for non-fx variables + if var['frequency'] != 'fx': + for filename in input_files: + start, end = get_start_end_year(filename) + available_years.update(range(start, end + 1)) + + missing_years = required_years - available_years + if missing_years: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): From e16afb5c47dcf09665b167a45dd30a2e4e2beb50 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 14:38:39 +0100 Subject: [PATCH 29/54] inline changes for removal of fx files finding and a bit of cleanup --- esmvaltool/_recipe.py | 90 +++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 54 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 4e870f41fa..37b546582b 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -11,9 +11,9 @@ from . import _recipe_checks as check from . import __version__ -from ._config import TAGS, _get_cmip6_fx_mip, get_institutes, replace_tags -from ._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file, get_statistic_output_file) +from ._config import TAGS, get_institutes, replace_tags +from ._data_finder import (get_input_filelist, get_output_file, + get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks, @@ -366,27 +366,23 @@ def _get_default_settings(variable, config_user, derive=False): return settings -def _convert_fxvar_to_cmor(fx_var, variable): - """ - Conversion from a string name to full variable dict. - - Convert from string fx_var to full cmor fx variable - with variable as parent. - """ +def _add_fxvar_keys(fx_var_dict, variable): + """Add a couple keys specific to fx variable.""" fx_variable = deepcopy(variable) # add internal recognition flag fx_variable['fxvar'] = True - fx_variable['variable_group'] = fx_var - fx_variable['short_name'] = fx_var + fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable['short_name'] = fx_var_dict['short_name'] # specificities of project if fx_variable['project'] == 'CMIP5': fx_variable['mip'] = 'fx' elif fx_variable['project'] == 'CMIP6': fx_variable['grid'] = variable['grid'] - fx_variable['mip'] = _get_cmip6_fx_mip(fx_var) + if 'mip' in fx_var_dict: + fx_variable['mip'] = fx_var_dict['mip'] return fx_variable @@ -412,7 +408,7 @@ def _update_fx_settings(settings, variable, config_user): _augment(var, variable) # first convert the fx_file strings to real variables fx_varlist = [ - _convert_fxvar_to_cmor(fx_var, variable) + _add_fxvar_keys(fx_var, variable) for fx_var in var['fx_files'] ] # now get the fx files output @@ -435,7 +431,7 @@ def _update_fx_settings(settings, variable, config_user): var['fx_files'] = ['sftlf', 'sftof'] # first convert the fx_file strings to real variables fx_varlist = [ - _convert_fxvar_to_cmor(fx_var, variable) + _add_fxvar_keys(fx_var, variable) for fx_var in var['fx_files'] ] # now get the files @@ -460,7 +456,7 @@ def _update_fx_settings(settings, variable, config_user): var['fx_files'] = ['sftgif'] # first convert the fx_file strings to real variables fx_varlist = [ - _convert_fxvar_to_cmor(fx_var, variable) + _add_fxvar_keys(fx_var, variable) for fx_var in var['fx_files'] ] # now get the files @@ -475,7 +471,7 @@ def _update_fx_settings(settings, variable, config_user): if settings.get(step, {}).get('fx_files'): # first convert the fx_file strings to real variables fx_varlist = [ - _convert_fxvar_to_cmor(fx_var, variable) + _add_fxvar_keys(fx_var, variable) for fx_var in var['fx_files'] ] # now get the files @@ -500,36 +496,26 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset.""" # Find input files locally. - if 'fxvar' not in variable.keys(): - if 'is_fx_variable' not in variable.keys(): - input_files = get_input_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs']) - else: - input_files = get_input_fx_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs'])[variable['short_name']] - input_files = [input_files] - else: - input_files = get_input_fx_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs'])[variable['short_name']] - input_files = [input_files] - + var = dict(variable) + # change ensemble to fixed r0i0p0 + if var['project'] == 'CMIP5': + if var['frequency'] == 'fx': + var['ensemble'] = 'r0i0p0' + input_files = get_input_filelist( + variable=var, + rootpath=config_user['rootpath'], + drs=config_user['drs']) # Set up downloading using synda if requested. # Do not download if files are already available locally. if config_user['synda_download'] and not input_files: - input_files = synda_search(variable) + input_files = synda_search(var) logger.info("Using input files for variable %s of dataset %s:\n%s", - variable['short_name'], variable['dataset'], + var['short_name'], var['dataset'], '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') - or variable['dataset'] == variable.get('reference_dataset')): - check.data_availability(input_files, variable) + or var['dataset'] == var.get('reference_dataset')): + check.data_availability(input_files, var) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -915,7 +901,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): regular_variables[0]['short_name']) # don't do time gating for fx variables - if 'is_fx_variable' in regular_variables[0].keys(): + if regular_variables[0]['frequency'] == 'fx': profile['extract_time'] = False # add all regular variable task @@ -1012,25 +998,23 @@ def _initialize_datasets(raw_datasets): check.duplicate_datasets(datasets) return datasets - def _get_required_var_keys(self, variable, dataset, index, - is_fxvar=False, fx_var_name=None): + def _get_required_var_keys(self, variable, dataset, + index, fx_var_dict=None): """Assemble correct variable attributes.""" - if is_fxvar and fx_var_name: - variable['short_name'] = fx_var_name + if fx_var_dict: + variable['short_name'] = fx_var_dict['short_name'] variable.update(dataset) variable['recipe_dataset_index'] = index if ('cmor_table' not in variable and variable.get('project') in CMOR_TABLES): variable['cmor_table'] = variable['project'] - if not is_fxvar: + if not fx_var_dict: if 'end_year' in variable and 'max_years' in self._cfg: variable['end_year'] = min( variable['end_year'], variable['start_year'] + self._cfg['max_years'] - 1) else: - variable = _convert_fxvar_to_cmor(fx_var_name, variable) - if 'fx_files' in variable.keys(): - del variable['fx_files'] + variable = _add_fxvar_keys(fx_var_dict, variable) return variable def _assemble_varlist(self, raw_variable, datasets): @@ -1046,13 +1030,11 @@ def _assemble_varlist(self, raw_variable, datasets): if 'fx_files' not in raw_variable.keys(): return variables else: - for fx_var_name in raw_variable['fx_files']: + for fx_var_dict in raw_variable['fx_files']: for index, dataset in enumerate(datasets): fx_variable = deepcopy(raw_variable) fx_variable = self._get_required_var_keys( - fx_variable, dataset, index, - is_fxvar=True, fx_var_name=fx_var_name - ) + fx_variable, dataset, index, fx_var_dict=fx_var_dict) variables.append(fx_variable) return variables @@ -1086,7 +1068,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): # add fx files information if 'fx_files' in variable: for fx_file in variable['fx_files']: - DATASET_KEYS.add(fx_file) + DATASET_KEYS.add(fx_file['short_name']) fx_varlist = [ var for var in variables if 'fxvar' in var.keys() ] From 67aab02f373135f322ef1d6627d34142bc3f79e2 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 15:18:21 +0100 Subject: [PATCH 30/54] removed data finder test for fx files --- tests/integration/test_data_finder.py | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 565cb314b1..f0d6628f8d 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -7,8 +7,7 @@ import yaml import esmvaltool._config -from esmvaltool._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file) +from esmvaltool._data_finder import get_input_filelist, get_output_file from esmvaltool.cmor.table import read_cmor_tables # Initialize with standard config developer file @@ -92,27 +91,3 @@ def test_get_input_filelist(root, cfg): # Test result reference = [os.path.join(root, file) for file in cfg['found_files']] assert sorted(input_filelist) == sorted(reference) - - -@pytest.mark.parametrize('cfg', CONFIG['get_input_fx_filelist']) -def test_get_input_fx_filelist(root, cfg): - """Test retrieving fx filelist.""" - create_tree(root, cfg.get('available_files'), - cfg.get('available_symlinks')) - - # Find files - rootpath = {cfg['variable']['project']: [root]} - drs = {cfg['variable']['project']: cfg['drs']} - fx_variable = dict(cfg['variable']) - fx_files = {} - for fxvar in cfg['variable']['fx_files']: - fx_variable['short_name'] = fxvar - fx_files[fxvar] = get_input_fx_filelist(fx_variable, - rootpath, drs)[fxvar] - - # Test result - reference = { - fx_var: os.path.join(root, filename) if filename else None - for fx_var, filename in cfg['found_files'].items() - } - assert fx_files == reference From 7f08001931155291fc188c2061083b03d13c59c7 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 15:20:38 +0100 Subject: [PATCH 31/54] removed fx files functionalities --- esmvaltool/_config.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/esmvaltool/_config.py b/esmvaltool/_config.py index dc12fc9ce5..cc4ed79e4e 100644 --- a/esmvaltool/_config.py +++ b/esmvaltool/_config.py @@ -175,26 +175,6 @@ def get_institutes(variable): return CFG.get(project, {}).get('institutes', {}).get(dataset, []) -def replace_mip_fx(fx_file): - """Replace MIP so to retrieve correct fx files.""" - default_mip = 'Amon' - if fx_file not in CFG['CMIP5']['fx_mip_change']: - logger.warning( - 'mip for fx variable %s is not specified in ' - 'config_developer.yml, using default (%s)', fx_file, default_mip) - new_mip = CFG['CMIP5']['fx_mip_change'].get(fx_file, default_mip) - logger.debug("Switching mip for fx file finding to %s", new_mip) - return new_mip - - -def _get_cmip6_fx_mip(fx_var_name): - """Set the correct mip for CMIP6 fx variables.""" - new_mip = CFG['CMIP6']['fx_mip_change'][fx_var_name] - logger.debug("Switching mip for fx variable %s to %s", - fx_var_name, new_mip) - return new_mip - - TAGS_CONFIG_FILE = os.path.join( os.path.dirname(__file__), 'config-references.yml') From 10723da86f3022fbdf4d0196d2542b739b89ace2 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 16:45:19 +0100 Subject: [PATCH 32/54] more cleanup and migration of fx specific funcs to new module --- esmvaltool/_recipe.py | 117 +++++++++--------------------------------- 1 file changed, 23 insertions(+), 94 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 37b546582b..c89a69b9fa 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -14,6 +14,7 @@ from ._config import TAGS, get_institutes, replace_tags from ._data_finder import (get_input_filelist, get_output_file, get_statistic_output_file) +from ._fxvar import _add_fxvar_keys, _update_fx_files from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks, @@ -107,7 +108,6 @@ def _add_cmor_info(variable, override=False): logger.warning("Unknown CMOR table %s", variable['cmor_table']) derive = variable.get('derive', False) - fx_att = variable.get('fxvar', False) # Copy the following keys from CMOR table cmor_keys = [ 'standard_name', 'long_name', 'units', 'modeling_realm', 'frequency' @@ -120,9 +120,6 @@ def _add_cmor_info(variable, override=False): if derive and table_entry is None: custom_table = CMOR_TABLES['custom'] table_entry = custom_table.get_variable(mip, short_name) - if fx_att and table_entry is None: - custom_table = CMOR_TABLES['custom'] - table_entry = custom_table.get_variable(mip, short_name) if table_entry is None: raise RecipeError( @@ -366,118 +363,51 @@ def _get_default_settings(variable, config_user, derive=False): return settings -def _add_fxvar_keys(fx_var_dict, variable): - """Add a couple keys specific to fx variable.""" - fx_variable = deepcopy(variable) - - # add internal recognition flag - fx_variable['fxvar'] = True - - fx_variable['variable_group'] = fx_var_dict['short_name'] - fx_variable['short_name'] = fx_var_dict['short_name'] - - # specificities of project - if fx_variable['project'] == 'CMIP5': - fx_variable['mip'] = 'fx' - elif fx_variable['project'] == 'CMIP6': - fx_variable['grid'] = variable['grid'] - if 'mip' in fx_var_dict: - fx_variable['mip'] = fx_var_dict['mip'] - - return fx_variable - - -def _update_fx_files(fx_varlist, config_user, parent_variable): - """Get the fx files dict for a list of fx variables.""" - fx_files_dict = {} - for fx_variable in fx_varlist: - fx_files_dict[fx_variable['short_name']] = get_output_file( - fx_variable, - config_user['preproc_dir'], - parent_variable) - return fx_files_dict - - -def _update_fx_settings(settings, variable, config_user): +def _update_fx_settings(settings, variable): """Find and set the FX derive/mask settings.""" # update for derive if 'derive' in settings: - fx_files = {} - for var in get_required(variable['short_name']): - if 'fx_files' in var: - _augment(var, variable) - # first convert the fx_file strings to real variables - fx_varlist = [ - _add_fxvar_keys(fx_var, variable) - for fx_var in var['fx_files'] - ] - # now get the fx files output - fx_files.update( - _update_fx_files(fx_varlist, config_user, var)) - settings['derive']['fx_files'] = fx_files + if 'fx_files' in variable: + settings['derive']['fx_files'] = variable['fx_files'] # update for landsea if 'mask_landsea' in settings: # Configure ingestion of land/sea masks - if 'fx_files' not in variable.keys(): + if 'fx_files' not in variable: fx_sett = "fx_files: ['sftlf', 'sftof']" logger.error("You need to specify %s for variable %s", fx_sett, variable['short_name']) logger.debug('Getting fx mask settings now...') settings['mask_landsea']['fx_files'] = [] - - var = dict(variable) - var['fx_files'] = ['sftlf', 'sftof'] - # first convert the fx_file strings to real variables - fx_varlist = [ - _add_fxvar_keys(fx_var, variable) - for fx_var in var['fx_files'] - ] - # now get the files - fx_files_dict = _update_fx_files(fx_varlist, config_user, var) - - # allow both sftlf and sftof - if fx_files_dict['sftlf']: - settings['mask_landsea']['fx_files'].append(fx_files_dict['sftlf']) - if fx_files_dict['sftof']: - settings['mask_landsea']['fx_files'].append(fx_files_dict['sftof']) + if 'fx_files' in variable: + fx_files = variable['fx_files'] + # allow both sftlf and sftof + if fx_files['sftlf']: + settings['mask_landsea']['fx_files'].append(fx_files['sftlf']) + if fx_files['sftof']: + settings['mask_landsea']['fx_files'].append(fx_files['sftof']) if 'mask_landseaice' in settings: - if 'fx_files' not in variable.keys(): + if 'fx_files' not in variable: fx_sett = "fx_files: ['sftgif']" logger.error("You need to specify %s for variable %s", fx_sett, variable['short_name']) logger.debug('Getting fx mask settings now...') settings['mask_landseaice']['fx_files'] = [] - - var = dict(variable) - var['fx_files'] = ['sftgif'] - # first convert the fx_file strings to real variables - fx_varlist = [ - _add_fxvar_keys(fx_var, variable) - for fx_var in var['fx_files'] - ] - # now get the files - fx_files_dict = _update_fx_files(fx_varlist, config_user, var) - - # allow sftgif (only, for now) - if fx_files_dict['sftgif']: - settings['mask_landseaice']['fx_files'].append( - fx_files_dict['sftgif']) + if 'fx_files' in variable: + fx_files = variable['fx_files'] + # allow sftgif (only, for now) + if fx_files['sftlf']: + settings['mask_landseaice']['fx_files'].append( + fx_files['sftgif']) for step in ('average_region', 'average_volume'): if settings.get(step, {}).get('fx_files'): - # first convert the fx_file strings to real variables - fx_varlist = [ - _add_fxvar_keys(fx_var, variable) - for fx_var in var['fx_files'] - ] - # now get the files - settings[step]['fx_files'] = _update_fx_files(fx_varlist, - config_user, - var) + if 'fx_files' in variable: + fx_files = variable['fx_files'] + settings[step]['fx_files'] = fx_files def _read_attributes(filename): @@ -674,8 +604,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, settings=settings, config_user=config_user) _update_fx_settings( - settings=settings, variable=variable, - config_user=config_user) + settings=settings, variable=variable) _update_target_grid( variable=variable, variables=variables, From d025c0a65c1118432db9434fd1a5b5433a76b24e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 16:45:45 +0100 Subject: [PATCH 33/54] created WIP fx stuff module --- esmvaltool/_fxvar.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 esmvaltool/_fxvar.py diff --git a/esmvaltool/_fxvar.py b/esmvaltool/_fxvar.py new file mode 100644 index 0000000000..fdad7069f4 --- /dev/null +++ b/esmvaltool/_fxvar.py @@ -0,0 +1,38 @@ +"""Module to handle fx variables.""" +import logging +from copy import deepcopy + +from ._data_finder import get_output_file + +logger = logging.getLogger(__name__) + + +def _add_fxvar_keys(fx_var_dict, variable): + """Add a couple keys specific to fx variable.""" + fx_variable = deepcopy(variable) + + # add internal recognition flag + fx_variable['fxvar'] = True + fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable['short_name'] = fx_var_dict['short_name'] + + # specificities of project + if fx_variable['project'] == 'CMIP5': + fx_variable['mip'] = 'fx' + elif fx_variable['project'] == 'CMIP6': + fx_variable['grid'] = variable['grid'] + if 'mip' in fx_var_dict: + fx_variable['mip'] = fx_var_dict['mip'] + + return fx_variable + + +def _update_fx_files(fx_varlist, config_user, parent_variable): + """Get the fx files dict for a list of fx variables.""" + fx_files_dict = {} + for fx_variable in fx_varlist: + fx_files_dict[fx_variable['short_name']] = get_output_file( + fx_variable, + config_user['preproc_dir'], + parent_variable) + return fx_files_dict From 2ca8e08c38307a814e1d42bb4a6dc68fcf50287d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 10 May 2019 17:04:39 +0100 Subject: [PATCH 34/54] removed all references to fx structure --- esmvaltool/config-developer.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/esmvaltool/config-developer.yml b/esmvaltool/config-developer.yml index 80b28157eb..eb8e3464f6 100644 --- a/esmvaltool/config-developer.yml +++ b/esmvaltool/config-developer.yml @@ -220,10 +220,6 @@ OBS: input_file: default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc' BSC: '[short_name]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP5' @@ -232,10 +228,6 @@ obs4mips: input_dir: default: 'Tier[tier]/[dataset]' input_file: '[short_name]_[dataset]_[level]_[version]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' cmor_path: 'obs4mips' From 506859440197bb5b84ac55202cd63c0998e8887d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Sun, 12 May 2019 13:16:53 +0100 Subject: [PATCH 35/54] removed all traces of specific fx file retrieval --- esmvaltool/_data_finder.py | 53 ++++++++++++-------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index ba6e21d532..1f9702dcbd 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -11,8 +11,7 @@ import six -from ._config import get_project_config, replace_mip_fx -from .cmor.table import CMOR_TABLES +from ._config import get_project_config logger = logging.getLogger(__name__) @@ -95,7 +94,7 @@ def select_files(filenames, start_year, end_year): return selection -def _replace_tags(path, variable, fx_var=None): +def _replace_tags(path, variable): """Replace tags in the config-developer's file with actual values.""" path = path.strip('/') @@ -106,9 +105,7 @@ def _replace_tags(path, variable, fx_var=None): original_tag = tag tag, _, _ = _get_caps_options(tag) - if tag == 'fx_var': - replacewith = fx_var - elif tag == 'latestversion': # handled separately later + if tag == 'latestversion': # handled separately later continue elif tag in variable: replacewith = variable[tag] @@ -198,16 +195,16 @@ def get_rootpath(rootpath, project): raise KeyError('default rootpath must be specified in config-user file') -def _find_input_dirs(variable, rootpath, drs, fx_var=None): +def _find_input_dirs(variable, rootpath, drs): """Return a the full paths to input directories.""" project = variable['project'] root = get_rootpath(rootpath, project) - input_type = 'input_{}dir'.format('fx_' if fx_var else '') + input_type = 'input_dir' path_template = _select_drs(input_type, drs, project) dirnames = [] - for dirname_template in _replace_tags(path_template, variable, fx_var): + for dirname_template in _replace_tags(path_template, variable): for base_path in root: dirname = os.path.join(base_path, dirname_template) dirname = _resolve_latestversion(dirname) @@ -220,21 +217,17 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None): return dirnames -def _get_filenames_glob(variable, drs, fx_var=None): +def _get_filenames_glob(variable, drs): """Return patterns that can be used to look for input files.""" - input_type = 'input_{}file'.format('fx_' if fx_var else '') + input_type = 'input_file' path_template = _select_drs(input_type, drs, variable['project']) - filenames_glob = _replace_tags(path_template, variable, fx_var) + filenames_glob = _replace_tags(path_template, variable) return filenames_glob -def _find_input_files(variable, rootpath, drs, fx_var=None): - logger.debug("Looking for input %sfiles for variable %s of dataset %s", - fx_var + ' fx ' if fx_var else '', variable['short_name'], - variable['dataset']) - - input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var) - filenames_glob = _get_filenames_glob(variable, drs, fx_var) +def _find_input_files(variable, rootpath, drs): + input_dirs = _find_input_dirs(variable, rootpath, drs) + filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) return files @@ -243,27 +236,13 @@ def _find_input_files(variable, rootpath, drs, fx_var=None): def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" files = _find_input_files(variable, rootpath, drs) - files = select_files(files, variable['start_year'], variable['end_year']) + # do time gating only for non-fx variables + if variable['frequency'] != 'fx': + files = select_files(files, variable['start_year'], + variable['end_year']) return files -def get_input_fx_filelist(variable, rootpath, drs): - """Return a dict with the full path to fx input files.""" - fx_files = {} - for fx_var in variable['fx_files']: - var = dict(variable) - var['mip'] = replace_mip_fx(fx_var) - table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) - var['frequency'] = table.frequency - realm = getattr(table.get(var['short_name']), 'modeling_realm', None) - var['modeling_realm'] = realm if realm else table.realm - - files = _find_input_files(var, rootpath, drs, fx_var) - fx_files[fx_var] = files[0] if files else None - - return fx_files - - def get_output_file(variable, preproc_dir): """Return the full path to the output (preprocessed) file.""" cfg = get_project_config(variable['project']) From 909e34f6495734080387a5fffaee1aac3a53311b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Sun, 12 May 2019 13:17:50 +0100 Subject: [PATCH 36/54] do time checks only if not fx variable --- esmvaltool/cmor/check.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/esmvaltool/cmor/check.py b/esmvaltool/cmor/check.py index 7f9399d545..8a9a9eec35 100644 --- a/esmvaltool/cmor/check.py +++ b/esmvaltool/cmor/check.py @@ -95,13 +95,15 @@ def check_metadata(self, logger=None): self._check_fill_value() self._check_dim_names() self._check_coords() - self._check_time_coord() + if self.frequency != 'fx': + self._check_time_coord() self._check_rank() self.report_warnings(logger) self.report_errors() - self._add_auxiliar_time_coordinates() + if self.frequency != 'fx': + self._add_auxiliar_time_coordinates() return self._cube def report_errors(self): From 1bbf364ff4490ee6544662de1b3bd942566c7219 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Sun, 12 May 2019 13:18:43 +0100 Subject: [PATCH 37/54] removed references to fx files and dirs --- esmvaltool/config-developer.yml | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/esmvaltool/config-developer.yml b/esmvaltool/config-developer.yml index 06433da1bf..eb8e3464f6 100644 --- a/esmvaltool/config-developer.yml +++ b/esmvaltool/config-developer.yml @@ -24,7 +24,7 @@ CMIP6: BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: @@ -147,26 +147,7 @@ CMIP5: ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/' SMHI: '[dataset]/[ensemble]/[exp]/[frequency]' BSC: '[type]/[project]/[exp]/[dataset.lower]' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_*.nc' - input_fx_dir: - default: '/' - BADC: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - CP4CDS: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[fx_var]/latest/' - DKRZ: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - ETHZ: '[exp]/fx/[fx_var]/[dataset]/r0i0p0' - input_fx_file: '[fx_var]_fx_[dataset]_[exp]_r0i0p0.nc' - fx_mip_change: - 'areacella': 'Amon' - 'areacello': 'Omon' - 'basin': 'Omon' - 'deptho': 'Omon' - 'mrsofc': 'Lmon' - 'orog': 'Amon' - 'rootd': 'Lmon' - 'sftgif': 'Lmon' - 'sftlf': 'Amon' - 'sftof': 'Omon' - 'volcello': 'Omon' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' institutes: 'ACCESS1-0': ['CSIRO-BOM'] @@ -239,10 +220,6 @@ OBS: input_file: default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc' BSC: '[short_name]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP5' @@ -251,10 +228,6 @@ obs4mips: input_dir: default: 'Tier[tier]/[dataset]' input_file: '[short_name]_[dataset]_[level]_[version]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' cmor_path: 'obs4mips' From 3e6342442ae95dc5b3bdb9fc0f769a94cbb1321e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Sun, 12 May 2019 13:19:32 +0100 Subject: [PATCH 38/54] removed fx files function --- esmvaltool/_config.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/esmvaltool/_config.py b/esmvaltool/_config.py index 221c06d026..cc4ed79e4e 100644 --- a/esmvaltool/_config.py +++ b/esmvaltool/_config.py @@ -175,18 +175,6 @@ def get_institutes(variable): return CFG.get(project, {}).get('institutes', {}).get(dataset, []) -def replace_mip_fx(fx_file): - """Replace MIP so to retrieve correct fx files.""" - default_mip = 'Amon' - if fx_file not in CFG['CMIP5']['fx_mip_change']: - logger.warning( - 'mip for fx variable %s is not specified in ' - 'config_developer.yml, using default (%s)', fx_file, default_mip) - new_mip = CFG['CMIP5']['fx_mip_change'].get(fx_file, default_mip) - logger.debug("Switching mip for fx file finding to %s", new_mip) - return new_mip - - TAGS_CONFIG_FILE = os.path.join( os.path.dirname(__file__), 'config-references.yml') From 33b906604a3e743b033b21bec8cdb32dda516b90 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Sun, 12 May 2019 13:21:15 +0100 Subject: [PATCH 39/54] restructuring to allow for fx vars treated like any other var --- esmvaltool/_recipe.py | 113 ++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 33 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 6ab0236f55..c0a7c26e12 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -11,8 +11,8 @@ from . import __version__ from . import _recipe_checks as check from ._config import TAGS, get_institutes, replace_tags -from ._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file, get_statistic_output_file) +from ._data_finder import (get_input_filelist, get_output_file, + get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks, @@ -361,6 +361,26 @@ def _get_default_settings(variable, config_user, derive=False): return settings +def _add_fxvar_keys(fx_var_dict, variable): + """Add a couple keys specific to fx variable.""" + fx_variable = deepcopy(variable) + + # add internal recognition flag + fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable['short_name'] = fx_var_dict['short_name'] + + # specificities of project + if fx_variable['project'] == 'CMIP5': + fx_variable['mip'] = 'fx' + fx_variable['ensemble'] = 'r0i0p0' + elif fx_variable['project'] == 'CMIP6': + fx_variable['grid'] = variable['grid'] + if 'mip' in fx_var_dict: + fx_variable['mip'] = fx_var_dict['mip'] + + return fx_variable + + def _update_fx_settings(settings, variable, config_user): """Find and set the FX derive/mask settings.""" # update for derive @@ -369,11 +389,13 @@ def _update_fx_settings(settings, variable, config_user): for var in get_required(variable['short_name']): if 'fx_files' in var: _augment(var, variable) - fx_files.update( - get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs'])) + for fx_var_dict in var['fx_files']: + fx_var = _add_fxvar_keys(fx_var_dict, variable) + fx_files.update( + get_input_filelist( + variable=fx_var, + rootpath=config_user['rootpath'], + drs=config_user['drs'])) settings['derive']['fx_files'] = fx_files # update for landsea @@ -384,11 +406,14 @@ def _update_fx_settings(settings, variable, config_user): settings['mask_landsea']['fx_files'] = [] var = dict(variable) - var['fx_files'] = ['sftlf', 'sftof'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + var['fx_files'] = [{'short_name': 'sftlf'}, {'short_name': 'sftof'}] + fx_files_dict = {} + for fx_var_dict in var['fx_files']: + fx_var = _add_fxvar_keys(fx_var_dict, var) + fx_files_dict[fx_var['short_name']] = get_input_filelist( + variable=fx_var, + rootpath=config_user['rootpath'], + drs=config_user['drs']) # allow both sftlf and sftof if fx_files_dict['sftlf']: @@ -402,11 +427,14 @@ def _update_fx_settings(settings, variable, config_user): settings['mask_landseaice']['fx_files'] = [] var = dict(variable) - var['fx_files'] = ['sftgif'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + var['fx_files'] = [{'short_name': 'sftgif'}] + fx_files_dict = {} + for fx_var_dict in var['fx_files']: + fx_var = _add_fxvar_keys(fx_var_dict, var) + fx_files_dict[fx_var['short_name']] = get_input_filelist( + variable=fx_var, + rootpath=config_user['rootpath'], + drs=config_user['drs']) # allow sftgif (only, for now) if fx_files_dict['sftgif']: @@ -415,11 +443,16 @@ def _update_fx_settings(settings, variable, config_user): for step in ('average_region', 'average_volume'): if settings.get(step, {}).get('fx_files'): - settings[step]['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs'], - ) + var = dict(variable) + var['fx_files'] = settings.get(step, {}).get('fx_files') + fx_files_dict = {} + for fx_var_dict in var['fx_files']: + fx_var = _add_fxvar_keys(fx_var_dict, var) + fx_files_dict[fx_var['short_name']] = get_input_filelist( + variable=fx_var, + rootpath=config_user['rootpath'], + drs=config_user['drs']) + settings[step]['fx_files'] = fx_files_dict def _read_attributes(filename): @@ -438,22 +471,27 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset.""" # Find input files locally. + var = dict(variable) + # change ensemble to fixed r0i0p0 + if var['project'] == 'CMIP5': + if var['frequency'] == 'fx': + var['ensemble'] = 'r0i0p0' input_files = get_input_filelist( - variable=variable, + variable=var, rootpath=config_user['rootpath'], drs=config_user['drs']) # Set up downloading using synda if requested. # Do not download if files are already available locally. if config_user['synda_download'] and not input_files: - input_files = synda_search(variable) + input_files = synda_search(var) logger.info("Using input files for variable %s of dataset %s:\n%s", - variable['short_name'], variable['dataset'], + var['short_name'], var['dataset'], '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') - or variable['dataset'] == variable.get('reference_dataset')): - check.data_availability(input_files, variable) + or var['dataset'] == var.get('reference_dataset')): + check.data_availability(input_files, var) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -772,6 +810,10 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): name=derive_name) derive_tasks.append(task) + # don't do time gating for fx variables + if variables[0]['frequency'] == 'fx': + profile['extract_time'] = False + # Create (final) preprocessor task task = _get_single_preprocessor_task( variables, @@ -905,13 +947,18 @@ def _initialize_variables(self, raw_variable, raw_datasets): variable['institute'] = institute check.variable(variable, required_keys) if 'fx_files' in variable: - for fx_file in variable['fx_files']: - DATASET_KEYS.add(fx_file) + for fx_file_dict in variable['fx_files']: + DATASET_KEYS.add(fx_file_dict['short_name']) # Get the fx files - variable['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) + fx_files_dict = {} + for fx_var_dict in variable['fx_files']: + fx_var = _add_fxvar_keys(fx_var_dict, variable) + _add_cmor_info(fx_var) + fx_files_dict[fx_var['short_name']] = get_input_filelist( + variable=fx_var, + rootpath=self._cfg['rootpath'], + drs=self._cfg['drs']) + variable['fx_files'] = fx_files_dict logger.info("Using fx files for var %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], variable['fx_files']) From 5614f7c44c1db4ff355ec52661e62eddc13de8bc Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 15:30:26 +0100 Subject: [PATCH 40/54] cleaning up --- esmvaltool/_data_finder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esmvaltool/_data_finder.py b/esmvaltool/_data_finder.py index 1f9702dcbd..158272daa7 100644 --- a/esmvaltool/_data_finder.py +++ b/esmvaltool/_data_finder.py @@ -235,6 +235,9 @@ def _find_input_files(variable, rootpath, drs): def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" + # change ensemble to fixed r0i0p0 for fx variables + if variable['project'] == 'CMIP5'and variable['frequency'] == 'fx': + variable['ensemble'] = 'r0i0p0' files = _find_input_files(variable, rootpath, drs) # do time gating only for non-fx variables if variable['frequency'] != 'fx': From 475cd8f9e897b0a4a87ad54cffaf70f84d6cfa20 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 15:31:23 +0100 Subject: [PATCH 41/54] cleaning up --- esmvaltool/_recipe.py | 101 +++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index c0a7c26e12..79524c7970 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -6,10 +6,11 @@ from copy import deepcopy import yaml + from netCDF4 import Dataset -from . import __version__ from . import _recipe_checks as check +from . import __version__ from ._config import TAGS, get_institutes, replace_tags from ._data_finder import (get_input_filelist, get_output_file, get_statistic_output_file) @@ -361,11 +362,28 @@ def _get_default_settings(variable, config_user, derive=False): return settings +def get_input_fx_filelist(variable, rootpath, drs): + """Return a dict with fx vars keys and full file paths values.""" + fx_files_dict = {} + for fx_var_dict in variable['fx_files']: + fx_var = _add_fxvar_keys(fx_var_dict, variable) + fx_files = get_input_filelist( + variable=fx_var, + rootpath=rootpath, + drs=drs) + if fx_files: + fx_files_dict[fx_var['short_name']] = fx_files[0] + else: + fx_files_dict[fx_var['short_name']] = None + + return fx_files_dict + + def _add_fxvar_keys(fx_var_dict, variable): - """Add a couple keys specific to fx variable.""" - fx_variable = deepcopy(variable) + """Add keys specific to fx variable to use get_input_filelist.""" + fx_variable = dict(variable) - # add internal recognition flag + # set variable names fx_variable['variable_group'] = fx_var_dict['short_name'] fx_variable['short_name'] = fx_var_dict['short_name'] @@ -377,6 +395,8 @@ def _add_fxvar_keys(fx_var_dict, variable): fx_variable['grid'] = variable['grid'] if 'mip' in fx_var_dict: fx_variable['mip'] = fx_var_dict['mip'] + # add missing cmor info + _add_cmor_info(fx_variable, override=True) return fx_variable @@ -389,13 +409,11 @@ def _update_fx_settings(settings, variable, config_user): for var in get_required(variable['short_name']): if 'fx_files' in var: _augment(var, variable) - for fx_var_dict in var['fx_files']: - fx_var = _add_fxvar_keys(fx_var_dict, variable) - fx_files.update( - get_input_filelist( - variable=fx_var, - rootpath=config_user['rootpath'], - drs=config_user['drs'])) + fx_files.update( + get_input_fx_filelist( + variable=var, + rootpath=config_user['rootpath'], + drs=config_user['drs'])) settings['derive']['fx_files'] = fx_files # update for landsea @@ -407,13 +425,10 @@ def _update_fx_settings(settings, variable, config_user): var = dict(variable) var['fx_files'] = [{'short_name': 'sftlf'}, {'short_name': 'sftof'}] - fx_files_dict = {} - for fx_var_dict in var['fx_files']: - fx_var = _add_fxvar_keys(fx_var_dict, var) - fx_files_dict[fx_var['short_name']] = get_input_filelist( - variable=fx_var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + fx_files_dict = get_input_fx_filelist( + variable=var, + rootpath=config_user['rootpath'], + drs=config_user['drs']) # allow both sftlf and sftof if fx_files_dict['sftlf']: @@ -428,13 +443,10 @@ def _update_fx_settings(settings, variable, config_user): var = dict(variable) var['fx_files'] = [{'short_name': 'sftgif'}] - fx_files_dict = {} - for fx_var_dict in var['fx_files']: - fx_var = _add_fxvar_keys(fx_var_dict, var) - fx_files_dict[fx_var['short_name']] = get_input_filelist( - variable=fx_var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + fx_files_dict = get_input_fx_filelist( + variable=var, + rootpath=config_user['rootpath'], + drs=config_user['drs']) # allow sftgif (only, for now) if fx_files_dict['sftgif']: @@ -445,13 +457,10 @@ def _update_fx_settings(settings, variable, config_user): if settings.get(step, {}).get('fx_files'): var = dict(variable) var['fx_files'] = settings.get(step, {}).get('fx_files') - fx_files_dict = {} - for fx_var_dict in var['fx_files']: - fx_var = _add_fxvar_keys(fx_var_dict, var) - fx_files_dict[fx_var['short_name']] = get_input_filelist( - variable=fx_var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + fx_files_dict = get_input_fx_filelist( + variable=var, + rootpath=config_user['rootpath'], + drs=config_user['drs']) settings[step]['fx_files'] = fx_files_dict @@ -471,27 +480,22 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset.""" # Find input files locally. - var = dict(variable) - # change ensemble to fixed r0i0p0 - if var['project'] == 'CMIP5': - if var['frequency'] == 'fx': - var['ensemble'] = 'r0i0p0' input_files = get_input_filelist( - variable=var, + variable=variable, rootpath=config_user['rootpath'], drs=config_user['drs']) # Set up downloading using synda if requested. # Do not download if files are already available locally. if config_user['synda_download'] and not input_files: - input_files = synda_search(var) + input_files = synda_search(variable) logger.info("Using input files for variable %s of dataset %s:\n%s", - var['short_name'], var['dataset'], + variable['short_name'], variable['dataset'], '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') - or var['dataset'] == var.get('reference_dataset')): - check.data_availability(input_files, var) + or var['dataset'] == variable.get('reference_dataset')): + check.data_availability(input_files, variable) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -950,15 +954,10 @@ def _initialize_variables(self, raw_variable, raw_datasets): for fx_file_dict in variable['fx_files']: DATASET_KEYS.add(fx_file_dict['short_name']) # Get the fx files - fx_files_dict = {} - for fx_var_dict in variable['fx_files']: - fx_var = _add_fxvar_keys(fx_var_dict, variable) - _add_cmor_info(fx_var) - fx_files_dict[fx_var['short_name']] = get_input_filelist( - variable=fx_var, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) - variable['fx_files'] = fx_files_dict + variable['fx_files'] = get_input_fx_filelist( + variable=variable, + rootpath=self._cfg['rootpath'], + drs=self._cfg['drs']) logger.info("Using fx files for var %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], variable['fx_files']) From 0ee4a729fb59803f3704bebb7bab1eb1188faa5b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 15:32:36 +0100 Subject: [PATCH 42/54] fixed test for new fx file retrieval --- tests/integration/test_data_finder.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 484fdc99b7..ef8826cb44 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -3,12 +3,11 @@ import shutil import tempfile +import esmvaltool._config import pytest import yaml - -import esmvaltool._config -from esmvaltool._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file) +from esmvaltool._data_finder import get_input_filelist, get_output_file +from esmvaltool._recipe import get_input_fx_filelist from esmvaltool.cmor.table import read_cmor_tables # Initialize with standard config developer file @@ -103,11 +102,18 @@ def test_get_input_fx_filelist(root, cfg): # Find files rootpath = {cfg['variable']['project']: [root]} drs = {cfg['variable']['project']: cfg['drs']} - fx_files = get_input_fx_filelist(cfg['variable'], rootpath, drs) + cfg['variable']['fx_files'] = [ + {'short_name': short_name} for short_name + in cfg['variable']['fx_files'] + ] + fx_files_dict = get_input_fx_filelist( + variable=cfg['variable'], + rootpath=rootpath, + drs=drs) # Test result reference = { fx_var: os.path.join(root, filename) if filename else None for fx_var, filename in cfg['found_files'].items() } - assert fx_files == reference + assert fx_files_dict == reference From 6989951c63f67f9fe66beb7d4f2e475afe91eec1 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 15:33:15 +0100 Subject: [PATCH 43/54] added checks for no time gating --- esmvaltool/_recipe_checks.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/esmvaltool/_recipe_checks.py b/esmvaltool/_recipe_checks.py index a6ab607df0..ee0e569304 100644 --- a/esmvaltool/_recipe_checks.py +++ b/esmvaltool/_recipe_checks.py @@ -97,15 +97,18 @@ def data_availability(input_files, var): required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() - for filename in input_files: - start, end = get_start_end_year(filename) - available_years.update(range(start, end + 1)) - - missing_years = required_years - available_years - if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), input_files)) + # check time avail only for non-fx variables + if var['frequency'] != 'fx': + for filename in input_files: + start, end = get_start_end_year(filename) + available_years.update(range(start, end + 1)) + + missing_years = required_years - available_years + if missing_years: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): From 988d09cdba42e75fa62a03fc47baf01d1a64362a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 16:23:30 +0100 Subject: [PATCH 44/54] fixed the test to reflect the new standards --- tests/integration/test_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index aeaaedabd5..885eecc5d3 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -100,7 +100,7 @@ def find_files(_, filenames): filename = str(tmp_path / 'input' / filename) filenames = [] if filename.endswith('*.nc'): - filename = filename[:-len('*.nc')] + filename = filename[:-len('*.nc')] + '_' intervals = [ '1990_1999', '2000_2009', From 775715c801768aeccf65de296e3fc987cab0bb0e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 16:23:51 +0100 Subject: [PATCH 45/54] fixed the test to reflect the new standards --- tests/integration/preprocessor/_derive/test_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/preprocessor/_derive/test_interface.py b/tests/integration/preprocessor/_derive/test_interface.py index cef8ea93a4..8608cccf18 100644 --- a/tests/integration/preprocessor/_derive/test_interface.py +++ b/tests/integration/preprocessor/_derive/test_interface.py @@ -26,7 +26,7 @@ def test_get_required_with_fx(): reference = [{ 'short_name': 'nbp', - 'fx_files': ['sftlf'], + 'fx_files': [{'short_name': 'sftlf'}], }] assert variables == reference From 32b6376fbd1533c7e34f278fc2cc717792908d89 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 16:24:14 +0100 Subject: [PATCH 46/54] fixed the data structure standard to reflect new standards --- esmvaltool/preprocessor/_derive/nbp_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool/preprocessor/_derive/nbp_grid.py b/esmvaltool/preprocessor/_derive/nbp_grid.py index 5a07fcaed7..44047c4f7f 100644 --- a/esmvaltool/preprocessor/_derive/nbp_grid.py +++ b/esmvaltool/preprocessor/_derive/nbp_grid.py @@ -12,7 +12,7 @@ class DerivedVariable(DerivedVariableBase): # Required variables required = [{ 'short_name': 'nbp', - 'fx_files': ['sftlf'], + 'fx_files': [{'short_name': 'sftlf'}], }] @staticmethod From 26cf5ba88712f2c2055fc1e835df175d9953ee4a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 17:07:02 +0100 Subject: [PATCH 47/54] cleaning up again --- esmvaltool/_recipe.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index d94b98ac0e..e34bca5a28 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -362,23 +362,14 @@ def _get_default_settings(variable, config_user, derive=False): return settings -<<<<<<< HEAD -def _update_fx_settings(settings, variable): -======= -def get_input_fx_filelist(variable, rootpath, drs): - """Return a dict with fx vars keys and full file paths values.""" +def _update_fx_files(fx_varlist, config_user, parent_variable): + """Get the fx files dict for a list of fx variables.""" fx_files_dict = {} - for fx_var_dict in variable['fx_files']: - fx_var = _add_fxvar_keys(fx_var_dict, variable) - fx_files = get_input_filelist( - variable=fx_var, - rootpath=rootpath, - drs=drs) - if fx_files: - fx_files_dict[fx_var['short_name']] = fx_files[0] - else: - fx_files_dict[fx_var['short_name']] = None - + for fx_variable in fx_varlist: + fx_files_dict[fx_variable['short_name']] = get_output_file( + fx_variable, + config_user['preproc_dir'], + parent_variable) return fx_files_dict @@ -404,7 +395,7 @@ def _add_fxvar_keys(fx_var_dict, variable): return fx_variable -def _update_fx_settings(settings, variable, config_user): +def _update_fx_settings(settings, variable): """Find and set the FX derive/mask settings.""" # update for derive if 'derive' in settings: From b0025fdae3d9596e432d61a355eadcf3ac834cdf Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 17:07:21 +0100 Subject: [PATCH 48/54] removed unused module --- esmvaltool/_fxvar.py | 38 -------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 esmvaltool/_fxvar.py diff --git a/esmvaltool/_fxvar.py b/esmvaltool/_fxvar.py deleted file mode 100644 index fdad7069f4..0000000000 --- a/esmvaltool/_fxvar.py +++ /dev/null @@ -1,38 +0,0 @@ -"""Module to handle fx variables.""" -import logging -from copy import deepcopy - -from ._data_finder import get_output_file - -logger = logging.getLogger(__name__) - - -def _add_fxvar_keys(fx_var_dict, variable): - """Add a couple keys specific to fx variable.""" - fx_variable = deepcopy(variable) - - # add internal recognition flag - fx_variable['fxvar'] = True - fx_variable['variable_group'] = fx_var_dict['short_name'] - fx_variable['short_name'] = fx_var_dict['short_name'] - - # specificities of project - if fx_variable['project'] == 'CMIP5': - fx_variable['mip'] = 'fx' - elif fx_variable['project'] == 'CMIP6': - fx_variable['grid'] = variable['grid'] - if 'mip' in fx_var_dict: - fx_variable['mip'] = fx_var_dict['mip'] - - return fx_variable - - -def _update_fx_files(fx_varlist, config_user, parent_variable): - """Get the fx files dict for a list of fx variables.""" - fx_files_dict = {} - for fx_variable in fx_varlist: - fx_files_dict[fx_variable['short_name']] = get_output_file( - fx_variable, - config_user['preproc_dir'], - parent_variable) - return fx_files_dict From 87c5494a47cdeb2c7f4951cc2d32a0d245415db6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 May 2019 18:55:44 +0100 Subject: [PATCH 49/54] fixed almost all tests, bar a derive one that I need to look into more carefully --- esmvaltool/_recipe.py | 7 ++++--- tests/integration/test_data_finder.py | 30 --------------------------- tests/integration/test_recipe.py | 16 +++++++------- 3 files changed, 13 insertions(+), 40 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index e34bca5a28..81c044f90e 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -772,9 +772,10 @@ def append(group_prefix, var): append(group_prefix, var) else: # Process input data needed to derive variable - for var in get_required(variable['short_name']): - _augment(var, variable) - append(group_prefix, var) + if variable['frequency'] != 'fx': + for var in get_required(variable['short_name']): + _augment(var, variable) + append(group_prefix, var) return derive_input diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 3b84a9f295..69f362b722 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -8,7 +8,6 @@ import yaml from esmvaltool._data_finder import get_input_filelist, get_output_file -from esmvaltool._recipe import get_input_fx_filelist from esmvaltool.cmor.table import read_cmor_tables # Initialize with standard config developer file @@ -92,32 +91,3 @@ def test_get_input_filelist(root, cfg): # Test result reference = [os.path.join(root, file) for file in cfg['found_files']] assert sorted(input_filelist) == sorted(reference) -<<<<<<< HEAD -======= - - -@pytest.mark.parametrize('cfg', CONFIG['get_input_fx_filelist']) -def test_get_input_fx_filelist(root, cfg): - """Test retrieving fx filelist.""" - create_tree(root, cfg.get('available_files'), - cfg.get('available_symlinks')) - - # Find files - rootpath = {cfg['variable']['project']: [root]} - drs = {cfg['variable']['project']: cfg['drs']} - cfg['variable']['fx_files'] = [ - {'short_name': short_name} for short_name - in cfg['variable']['fx_files'] - ] - fx_files_dict = get_input_fx_filelist( - variable=cfg['variable'], - rootpath=rootpath, - drs=drs) - - # Test result - reference = { - fx_var: os.path.join(root, filename) if filename else None - for fx_var, filename in cfg['found_files'].items() - } - assert fx_files_dict == reference ->>>>>>> version2_development_fx_RESTRUCTURED diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 885eecc5d3..e04324ee2d 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -602,6 +602,7 @@ def test_derive_with_fx(tmp_path, patched_datafinder, config_user): end_year: 2005 derive: true force_derivation: true + fx_files: [{'short_name': 'sftlf'}] additional_datasets: - {dataset: GFDL-CM3, ensemble: r1i1p1} scripts: null @@ -619,13 +620,14 @@ def test_derive_with_fx(tmp_path, patched_datafinder, config_user): assert ancestor.name == 'diagnostic_name/nbp_grid_derive_input_nbp' # Check product content of tasks - assert len(task.products) == 1 - product = task.products.pop() - assert 'derive' in product.settings - assert product.attributes['short_name'] == 'nbp_grid' - assert 'fx_files' in product.settings['derive'] - assert 'sftlf' in product.settings['derive']['fx_files'] - assert product.settings['derive']['fx_files']['sftlf'] is not None + # this will be 2 since it works out the fx var as product + assert len(task.products) == 2 + for product in task.products: + if product.attributes['short_name'] == 'nbp_grid': + assert 'derive' in product.settings + assert 'fx_files' in product.settings['derive'] + assert 'sftlf' in product.settings['derive']['fx_files'] + assert product.settings['derive']['fx_files']['sftlf'] is not None assert len(ancestor.products) == 1 ancestor_product = ancestor.products.pop() From 7325751642e1121eae1a9bd3865733a610b84b53 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 15 May 2019 16:33:32 +0100 Subject: [PATCH 50/54] fixed test properly to account for full functionality of fx ancestry and processing --- tests/integration/test_recipe.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index e04324ee2d..242bea7263 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -615,19 +615,20 @@ def test_derive_with_fx(tmp_path, patched_datafinder, config_user): task = recipe.tasks.pop() assert task.name == 'diagnostic_name' + TASKSEP + 'nbp_grid' - assert len(task.ancestors) == 1 - ancestor = [t for t in task.ancestors][0] + # fx_sftlf and nbp_grid so 2 + assert len(task.ancestors) == 2 + fx_ancestor = [t for t in task.ancestors][0] + assert fx_ancestor.name == 'diagnostic_name/fx_sftlf' + ancestor = [t for t in task.ancestors][1] assert ancestor.name == 'diagnostic_name/nbp_grid_derive_input_nbp' # Check product content of tasks - # this will be 2 since it works out the fx var as product - assert len(task.products) == 2 - for product in task.products: - if product.attributes['short_name'] == 'nbp_grid': - assert 'derive' in product.settings - assert 'fx_files' in product.settings['derive'] - assert 'sftlf' in product.settings['derive']['fx_files'] - assert product.settings['derive']['fx_files']['sftlf'] is not None + assert len(task.products) == 1 + product = task.products.pop() + assert 'derive' in product.settings + assert 'fx_files' in product.settings['derive'] + assert 'sftlf' in product.settings['derive']['fx_files'] + assert product.settings['derive']['fx_files']['sftlf'] is not None assert len(ancestor.products) == 1 ancestor_product = ancestor.products.pop() From 7cc6c091228414c839988cf3bcb2bca261eebe96 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 15 May 2019 16:44:01 +0100 Subject: [PATCH 51/54] final bug fixes and error raising --- esmvaltool/_recipe.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/esmvaltool/_recipe.py b/esmvaltool/_recipe.py index 81c044f90e..4a6d432d69 100644 --- a/esmvaltool/_recipe.py +++ b/esmvaltool/_recipe.py @@ -366,10 +366,12 @@ def _update_fx_files(fx_varlist, config_user, parent_variable): """Get the fx files dict for a list of fx variables.""" fx_files_dict = {} for fx_variable in fx_varlist: - fx_files_dict[fx_variable['short_name']] = get_output_file( - fx_variable, - config_user['preproc_dir'], - parent_variable) + if fx_variable['dataset'] == parent_variable['dataset']: + fx_files_dict[fx_variable['short_name']] = get_output_file( + fx_variable, + config_user['preproc_dir'], + parent_variable) + return fx_files_dict @@ -377,7 +379,14 @@ def _add_fxvar_keys(fx_var_dict, variable): """Add keys specific to fx variable to use get_input_filelist.""" fx_variable = dict(variable) - # set variable names + # remove keys that dont belong to fx var analysis + if 'fx_files' in fx_variable: + del fx_variable['fx_files'] + if 'force_derivation' in fx_variable: + del fx_variable['force_derivation'] + + # set fx recognition flag and variable names + fx_variable['fxvar'] = True fx_variable['variable_group'] = fx_var_dict['short_name'] fx_variable['short_name'] = fx_var_dict['short_name'] @@ -406,9 +415,11 @@ def _update_fx_settings(settings, variable): if 'mask_landsea' in settings: # Configure ingestion of land/sea masks if 'fx_files' not in variable: - fx_sett = "fx_files: ['sftlf', 'sftof']" - logger.error("You need to specify %s for variable %s", - fx_sett, variable['short_name']) + fx_set = "fx_files: ['sftlf', 'sftof']" + raise RecipeError( + "Masking: {} missing from {}".format(fx_set, + variable['short_name'])) + logger.debug('Getting fx mask settings now...') settings['mask_landsea']['fx_files'] = [] if 'fx_files' in variable: @@ -421,9 +432,10 @@ def _update_fx_settings(settings, variable): if 'mask_landseaice' in settings: if 'fx_files' not in variable: - fx_sett = "fx_files: ['sftgif']" - logger.error("You need to specify %s for variable %s", - fx_sett, variable['short_name']) + fx_set = "fx_files: ['sftgif']" + raise RecipeError( + "Masking: {} missing from {}".format(fx_set, + variable['short_name'])) logger.debug('Getting fx mask settings now...') settings['mask_landseaice']['fx_files'] = [] @@ -988,7 +1000,7 @@ def _assemble_varlist(self, raw_variable, datasets): index) variables.append(variable) - if 'fx_files' not in raw_variable.keys(): + if 'fx_files' not in raw_variable: return variables else: for fx_var_dict in raw_variable['fx_files']: @@ -1007,7 +1019,6 @@ def _initialize_variables(self, raw_variable, raw_datasets): # get full list of variables variables = self._assemble_varlist(raw_variable, datasets) - required_keys = { 'short_name', 'mip', @@ -1031,7 +1042,7 @@ def _initialize_variables(self, raw_variable, raw_datasets): for fx_file in variable['fx_files']: DATASET_KEYS.add(fx_file['short_name']) fx_varlist = [ - var for var in variables if 'fxvar' in var.keys() + var for var in variables if 'fxvar' in var ] variable['fx_files'] = _update_fx_files(fx_varlist, self._cfg, From d4ad12f2a0b9bdb91158b464ba5a8e104e8d2eb8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 10 Jun 2019 17:09:42 +0100 Subject: [PATCH 52/54] removed stray import introduced by merge --- tests/integration/test_data_finder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 99e49480ca..d931f5730c 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -7,8 +7,7 @@ import yaml import esmvalcore._config -from esmvalcore._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file) +from esmvalcore._data_finder import get_input_filelist, get_output_file from esmvalcore.cmor.table import read_cmor_tables # Initialize with standard config developer file From ef631a9ffae3026168a82e9de88622307fc44699 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 10 Jun 2019 17:10:30 +0100 Subject: [PATCH 53/54] removed unused import --- esmvalcore/preprocessor/_mask.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index 491d537c56..efba23d2e0 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -12,7 +12,6 @@ import numpy as np -import dask.array as da import cartopy.io.shapereader as shpreader import iris import shapely.vectorized as shp_vect From fefc7158333cae026d90813e16c4882cdd6b6a31 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 14 Jun 2019 15:04:12 +0100 Subject: [PATCH 54/54] removed unused import --- esmvalcore/_data_finder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 432c37da82..66e38a8753 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -10,7 +10,6 @@ import re from ._config import get_project_config -from .cmor.table import CMOR_TABLES logger = logging.getLogger(__name__)