diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 0efc7bc571..be081d343f 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -179,18 +179,6 @@ def get_institutes(variable): return CFG.get(project, {}).get('institutes', {}).get(dataset, []) -def replace_mip_fx(fx_file): - """Replace MIP so to retrieve correct fx files.""" - default_mip = 'Amon' - if fx_file not in CFG['CMIP5']['fx_mip_change']: - logger.warning( - 'mip for fx variable %s is not specified in ' - 'config_developer.yml, using default (%s)', fx_file, default_mip) - new_mip = CFG['CMIP5']['fx_mip_change'].get(fx_file, default_mip) - logger.debug("Switching mip for fx file finding to %s", new_mip) - return new_mip - - TAGS_CONFIG_FILE = os.path.join( DIAGNOSTICS_PATH, 'config-references.yml') diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 258b315c7d..0bc9450f70 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -9,8 +9,7 @@ import os import re -from ._config import get_project_config, replace_mip_fx -from .cmor.table import CMOR_TABLES +from ._config import get_project_config logger = logging.getLogger(__name__) @@ -93,7 +92,7 @@ def select_files(filenames, start_year, end_year): return selection -def _replace_tags(path, variable, fx_var=None): +def _replace_tags(path, variable): """Replace tags in the config-developer's file with actual values.""" path = path.strip('/') @@ -104,9 +103,7 @@ def _replace_tags(path, variable, fx_var=None): original_tag = tag tag, _, _ = _get_caps_options(tag) - if tag == 'fx_var': - replacewith = fx_var - elif tag == 'latestversion': # handled separately later + if tag == 'latestversion': # handled separately later continue elif tag in variable: replacewith = variable[tag] @@ -196,16 +193,15 @@ def get_rootpath(rootpath, project): raise KeyError('default rootpath must be specified in config-user file') -def _find_input_dirs(variable, rootpath, drs, fx_var=None): +def _find_input_dirs(variable, rootpath, drs): """Return a the full paths to input directories.""" project = variable['project'] root = get_rootpath(rootpath, project) - input_type = 'input_{}dir'.format('fx_' if fx_var else '') - path_template = _select_drs(input_type, drs, project) + path_template = _select_drs('input_dir', drs, project) dirnames = [] - for dirname_template in _replace_tags(path_template, variable, fx_var): + for dirname_template in _replace_tags(path_template, variable): for base_path in root: dirname = os.path.join(base_path, dirname_template) dirname = _resolve_latestversion(dirname) @@ -218,21 +214,16 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None): return dirnames -def _get_filenames_glob(variable, drs, fx_var=None): +def _get_filenames_glob(variable, drs): """Return patterns that can be used to look for input files.""" - input_type = 'input_{}file'.format('fx_' if fx_var else '') - path_template = _select_drs(input_type, drs, variable['project']) - filenames_glob = _replace_tags(path_template, variable, fx_var) + path_template = _select_drs('input_file', drs, variable['project']) + filenames_glob = _replace_tags(path_template, variable) return filenames_glob -def _find_input_files(variable, rootpath, drs, fx_var=None): - logger.debug("Looking for input %sfiles for variable %s of dataset %s", - fx_var + ' fx ' if fx_var else '', variable['short_name'], - variable['dataset']) - - input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var) - filenames_glob = _get_filenames_glob(variable, drs, fx_var) +def _find_input_files(variable, rootpath, drs): + input_dirs = _find_input_dirs(variable, rootpath, drs) + filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) return files @@ -240,28 +231,17 @@ def _find_input_files(variable, rootpath, drs, fx_var=None): def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" + # change ensemble to fixed r0i0p0 for fx variables + if variable['project'] == 'CMIP5'and variable['frequency'] == 'fx': + variable['ensemble'] = 'r0i0p0' files = _find_input_files(variable, rootpath, drs) - files = select_files(files, variable['start_year'], variable['end_year']) + # do time gating only for non-fx variables + if variable['frequency'] != 'fx': + files = select_files(files, variable['start_year'], + variable['end_year']) return files -def get_input_fx_filelist(variable, rootpath, drs): - """Return a dict with the full path to fx input files.""" - fx_files = {} - for fx_var in variable['fx_files']: - var = dict(variable) - var['mip'] = replace_mip_fx(fx_var) - table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) - var['frequency'] = table.frequency - realm = getattr(table.get(var['short_name']), 'modeling_realm', None) - var['modeling_realm'] = realm if realm else table.realm - - files = _find_input_files(var, rootpath, drs, fx_var) - fx_files[fx_var] = files[0] if files else None - - return fx_files - - def get_output_file(variable, preproc_dir): """Return the full path to the output (preprocessed) file.""" cfg = get_project_config(variable['project']) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3dc6cdf48c..a5b8d81495 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -6,13 +6,14 @@ from copy import deepcopy import yaml + from netCDF4 import Dataset -from . import __version__ from . import _recipe_checks as check +from . import __version__ from ._config import TAGS, get_institutes, replace_tags -from ._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file, get_statistic_output_file) +from ._data_finder import (get_input_filelist, get_output_file, + get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks, @@ -361,6 +362,45 @@ def _get_default_settings(variable, config_user, derive=False): return settings +def get_input_fx_filelist(variable, rootpath, drs): + """Return a dict with fx vars keys and full file paths values.""" + fx_files_dict = {} + for fx_var_dict in variable['fx_files']: + fx_var = _add_fxvar_keys(fx_var_dict, variable) + fx_files = get_input_filelist( + variable=fx_var, + rootpath=rootpath, + drs=drs) + if fx_files: + fx_files_dict[fx_var['short_name']] = fx_files[0] + else: + fx_files_dict[fx_var['short_name']] = None + + return fx_files_dict + + +def _add_fxvar_keys(fx_var_dict, variable): + """Add keys specific to fx variable to use get_input_filelist.""" + fx_variable = dict(variable) + + # set variable names + fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable['short_name'] = fx_var_dict['short_name'] + + # specificities of project + if fx_variable['project'] == 'CMIP5': + fx_variable['mip'] = 'fx' + fx_variable['ensemble'] = 'r0i0p0' + elif fx_variable['project'] == 'CMIP6': + fx_variable['grid'] = variable['grid'] + if 'mip' in fx_var_dict: + fx_variable['mip'] = fx_var_dict['mip'] + # add missing cmor info + _add_cmor_info(fx_variable, override=True) + + return fx_variable + + def _update_fx_settings(settings, variable, config_user): """Find and set the FX derive/mask settings.""" # update for derive @@ -384,7 +424,7 @@ def _update_fx_settings(settings, variable, config_user): settings['mask_landsea']['fx_files'] = [] var = dict(variable) - var['fx_files'] = ['sftlf', 'sftof'] + var['fx_files'] = [{'short_name': 'sftlf'}, {'short_name': 'sftof'}] fx_files_dict = get_input_fx_filelist( variable=var, rootpath=config_user['rootpath'], @@ -402,7 +442,7 @@ def _update_fx_settings(settings, variable, config_user): settings['mask_landseaice']['fx_files'] = [] var = dict(variable) - var['fx_files'] = ['sftgif'] + var['fx_files'] = [{'short_name': 'sftgif'}] fx_files_dict = get_input_fx_filelist( variable=var, rootpath=config_user['rootpath'], @@ -415,11 +455,13 @@ def _update_fx_settings(settings, variable, config_user): for step in ('area_statistics', 'volume_statistics'): if settings.get(step, {}).get('fx_files'): - settings[step]['fx_files'] = get_input_fx_filelist( - variable=variable, + var = dict(variable) + var['fx_files'] = settings.get(step, {}).get('fx_files') + fx_files_dict = get_input_fx_filelist( + variable=var, rootpath=config_user['rootpath'], - drs=config_user['drs'], - ) + drs=config_user['drs']) + settings[step]['fx_files'] = fx_files_dict def _read_attributes(filename): @@ -772,6 +814,10 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): name=derive_name) derive_tasks.append(task) + # don't do time gating for fx variables + if variables[0]['frequency'] == 'fx': + profile['extract_time'] = False + # Create (final) preprocessor task task = _get_single_preprocessor_task( variables, @@ -905,8 +951,8 @@ def _initialize_variables(self, raw_variable, raw_datasets): variable['institute'] = institute check.variable(variable, required_keys) if 'fx_files' in variable: - for fx_file in variable['fx_files']: - DATASET_KEYS.add(fx_file) + for fx_file_dict in variable['fx_files']: + DATASET_KEYS.add(fx_file_dict['short_name']) # Get the fx files variable['fx_files'] = get_input_fx_filelist( variable=variable, diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index a6ab607df0..ee0e569304 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -97,15 +97,18 @@ def data_availability(input_files, var): required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() - for filename in input_files: - start, end = get_start_end_year(filename) - available_years.update(range(start, end + 1)) - - missing_years = required_years - available_years - if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), input_files)) + # check time avail only for non-fx variables + if var['frequency'] != 'fx': + for filename in input_files: + start, end = get_start_end_year(filename) + available_years.update(range(start, end + 1)) + + missing_years = required_years - available_years + if missing_years: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 178cc64f76..5cac1b31cd 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -97,13 +97,15 @@ def check_metadata(self, logger=None): self._check_fill_value() self._check_dim_names() self._check_coords() - self._check_time_coord() + if self.frequency != 'fx': + self._check_time_coord() self._check_rank() self.report_warnings(logger) self.report_errors() - self._add_auxiliar_time_coordinates() + if self.frequency != 'fx': + self._add_auxiliar_time_coordinates() return self._cube def report_errors(self): diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index 06433da1bf..eb8e3464f6 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -24,7 +24,7 @@ CMIP6: BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: @@ -147,26 +147,7 @@ CMIP5: ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/' SMHI: '[dataset]/[ensemble]/[exp]/[frequency]' BSC: '[type]/[project]/[exp]/[dataset.lower]' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_*.nc' - input_fx_dir: - default: '/' - BADC: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - CP4CDS: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[fx_var]/latest/' - DKRZ: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - ETHZ: '[exp]/fx/[fx_var]/[dataset]/r0i0p0' - input_fx_file: '[fx_var]_fx_[dataset]_[exp]_r0i0p0.nc' - fx_mip_change: - 'areacella': 'Amon' - 'areacello': 'Omon' - 'basin': 'Omon' - 'deptho': 'Omon' - 'mrsofc': 'Lmon' - 'orog': 'Amon' - 'rootd': 'Lmon' - 'sftgif': 'Lmon' - 'sftlf': 'Amon' - 'sftof': 'Omon' - 'volcello': 'Omon' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' institutes: 'ACCESS1-0': ['CSIRO-BOM'] @@ -239,10 +220,6 @@ OBS: input_file: default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc' BSC: '[short_name]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP5' @@ -251,10 +228,6 @@ obs4mips: input_dir: default: 'Tier[tier]/[dataset]' input_file: '[short_name]_[dataset]_[level]_[version]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' cmor_path: 'obs4mips' diff --git a/esmvalcore/preprocessor/_derive/nbp_grid.py b/esmvalcore/preprocessor/_derive/nbp_grid.py index 54a6b95ce6..ff6169fda6 100644 --- a/esmvalcore/preprocessor/_derive/nbp_grid.py +++ b/esmvalcore/preprocessor/_derive/nbp_grid.py @@ -9,7 +9,7 @@ class DerivedVariable(DerivedVariableBase): # Required variables required = [{ 'short_name': 'nbp', - 'fx_files': ['sftlf'], + 'fx_files': [{'short_name': 'sftlf'}], }] @staticmethod diff --git a/tests/integration/preprocessor/_derive/test_interface.py b/tests/integration/preprocessor/_derive/test_interface.py index 460d3f5341..c0854ed529 100644 --- a/tests/integration/preprocessor/_derive/test_interface.py +++ b/tests/integration/preprocessor/_derive/test_interface.py @@ -26,7 +26,7 @@ def test_get_required_with_fx(): reference = [{ 'short_name': 'nbp', - 'fx_files': ['sftlf'], + 'fx_files': [{'short_name': 'sftlf'}], }] assert variables == reference diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 791b54453d..71e49ae53d 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -7,8 +7,8 @@ import yaml import esmvalcore._config -from esmvalcore._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file) +from esmvalcore._data_finder import get_input_filelist, get_output_file +from esmvalcore._recipe import get_input_fx_filelist from esmvalcore.cmor.table import read_cmor_tables # Initialize with standard config developer file @@ -103,11 +103,18 @@ def test_get_input_fx_filelist(root, cfg): # Find files rootpath = {cfg['variable']['project']: [root]} drs = {cfg['variable']['project']: cfg['drs']} - fx_files = get_input_fx_filelist(cfg['variable'], rootpath, drs) + cfg['variable']['fx_files'] = [ + {'short_name': short_name} for short_name + in cfg['variable']['fx_files'] + ] + fx_files_dict = get_input_fx_filelist( + variable=cfg['variable'], + rootpath=rootpath, + drs=drs) # Test result reference = { fx_var: os.path.join(root, filename) if filename else None for fx_var, filename in cfg['found_files'].items() } - assert fx_files == reference + assert fx_files_dict == reference diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 90730ad990..279955336b 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -97,7 +97,7 @@ def find_files(_, filenames): filename = str(tmp_path / 'input' / filename) filenames = [] if filename.endswith('*.nc'): - filename = filename[:-len('*.nc')] + filename = filename[:-len('*.nc')] + '_' intervals = [ '1990_1999', '2000_2009',