diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index c3710141fe..a79a80b0a4 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -196,18 +196,6 @@ def get_activity(variable): return None -def replace_mip_fx(fx_file): - """Replace MIP so to retrieve correct fx files.""" - default_mip = 'Amon' - if fx_file not in CFG['CMIP5']['fx_mip_change']: - logger.warning( - 'mip for fx variable %s is not specified in ' - 'config_developer.yml, using default (%s)', fx_file, default_mip) - new_mip = CFG['CMIP5']['fx_mip_change'].get(fx_file, default_mip) - logger.debug("Switching mip for fx file finding to %s", new_mip) - return new_mip - - TAGS_CONFIG_FILE = os.path.join( DIAGNOSTICS_PATH, 'config-references.yml') diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 258b315c7d..8610f9a0a4 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -9,8 +9,7 @@ import os import re -from ._config import get_project_config, replace_mip_fx -from .cmor.table import CMOR_TABLES +from ._config import get_project_config logger = logging.getLogger(__name__) @@ -93,7 +92,7 @@ def select_files(filenames, start_year, end_year): return selection -def _replace_tags(path, variable, fx_var=None): +def _replace_tags(path, variable): """Replace tags in the config-developer's file with actual values.""" path = path.strip('/') @@ -104,9 +103,7 @@ def _replace_tags(path, variable, fx_var=None): original_tag = tag tag, _, _ = _get_caps_options(tag) - if tag == 'fx_var': - replacewith = fx_var - elif tag == 'latestversion': # handled separately later + if tag == 'latestversion': # handled separately later continue elif tag in variable: replacewith = variable[tag] @@ -196,16 +193,15 @@ def get_rootpath(rootpath, project): raise KeyError('default rootpath must be specified in config-user file') -def _find_input_dirs(variable, rootpath, drs, fx_var=None): +def _find_input_dirs(variable, rootpath, drs): """Return a the full paths to input directories.""" project = variable['project'] root = get_rootpath(rootpath, project) - input_type = 'input_{}dir'.format('fx_' if fx_var else '') - path_template = _select_drs(input_type, drs, project) + path_template = _select_drs('input_dir', drs, project) dirnames = [] - for dirname_template in _replace_tags(path_template, variable, fx_var): + for dirname_template in _replace_tags(path_template, variable): for base_path in root: dirname = os.path.join(base_path, dirname_template) dirname = _resolve_latestversion(dirname) @@ -218,21 +214,16 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None): return dirnames -def _get_filenames_glob(variable, drs, fx_var=None): +def _get_filenames_glob(variable, drs): """Return patterns that can be used to look for input files.""" - input_type = 'input_{}file'.format('fx_' if fx_var else '') - path_template = _select_drs(input_type, drs, variable['project']) - filenames_glob = _replace_tags(path_template, variable, fx_var) + path_template = _select_drs('input_file', drs, variable['project']) + filenames_glob = _replace_tags(path_template, variable) return filenames_glob -def _find_input_files(variable, rootpath, drs, fx_var=None): - logger.debug("Looking for input %sfiles for variable %s of dataset %s", - fx_var + ' fx ' if fx_var else '', variable['short_name'], - variable['dataset']) - - input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var) - filenames_glob = _get_filenames_glob(variable, drs, fx_var) +def _find_input_files(variable, rootpath, drs): + input_dirs = _find_input_dirs(variable, rootpath, drs) + filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) return files @@ -240,28 +231,18 @@ def _find_input_files(variable, rootpath, drs, fx_var=None): def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" + # change ensemble to fixed r0i0p0 for fx variables + # this is needed and is not a duplicate effort + if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx': + variable['ensemble'] = 'r0i0p0' files = _find_input_files(variable, rootpath, drs) - files = select_files(files, variable['start_year'], variable['end_year']) + # do time gating only for non-fx variables + if variable['frequency'] != 'fx': + files = select_files(files, variable['start_year'], + variable['end_year']) return files -def get_input_fx_filelist(variable, rootpath, drs): - """Return a dict with the full path to fx input files.""" - fx_files = {} - for fx_var in variable['fx_files']: - var = dict(variable) - var['mip'] = replace_mip_fx(fx_var) - table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) - var['frequency'] = table.frequency - realm = getattr(table.get(var['short_name']), 'modeling_realm', None) - var['modeling_realm'] = realm if realm else table.realm - - files = _find_input_files(var, rootpath, drs, fx_var) - fx_files[fx_var] = files[0] if files else None - - return fx_files - - def get_output_file(variable, preproc_dir): """Return the full path to the output (preprocessed) file.""" cfg = get_project_config(variable['project']) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ee0acc2a0f..ef2f3b0238 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -11,8 +11,8 @@ from . import __version__ from . import _recipe_checks as check from ._config import TAGS, get_activity, get_institutes, replace_tags -from ._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file, get_statistic_output_file) +from ._data_finder import (get_input_filelist, get_output_file, + get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks, @@ -360,6 +360,51 @@ def _get_default_settings(variable, config_user, derive=False): return settings +def _add_fxvar_keys(fx_var_dict, variable): + """Add keys specific to fx variable to use get_input_filelist.""" + fx_variable = dict(variable) + + # set variable names + fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable['short_name'] = fx_var_dict['short_name'] + + # specificities of project + if fx_variable['project'] == 'CMIP5': + fx_variable['mip'] = 'fx' + fx_variable['ensemble'] = 'r0i0p0' + elif fx_variable['project'] == 'CMIP6': + fx_variable['grid'] = variable['grid'] + if 'mip' in fx_var_dict: + fx_variable['mip'] = fx_var_dict['mip'] + # add missing cmor info + _add_cmor_info(fx_variable, override=True) + + return fx_variable + + +def _get_correct_fx_file(variable, fx_varname, config_user): + """Wrapper to standard file getter to recover the correct fx file.""" + var = dict(variable) + if var['project'] == 'CMIP5': + fx_var = _add_fxvar_keys({'short_name': fx_varname, 'mip': 'fx'}, var) + elif var['project'] == 'CMIP6': + if fx_varname == 'sftlf': + fx_var = _add_fxvar_keys({'short_name': fx_varname, 'mip': 'fx'}, + var) + elif fx_varname == 'sftof': + fx_var = _add_fxvar_keys({'short_name': fx_varname, 'mip': 'Ofx'}, + var) + # TODO allow availability for multiple mip's for sftgif + elif fx_varname == 'sftgif': + fx_var = _add_fxvar_keys({'short_name': fx_varname, 'mip': 'fx'}, + var) + fx_files = get_input_filelist(variable=fx_var, + rootpath=config_user['rootpath'], + drs=config_user['drs'])[0] + + return fx_files + + def _update_fx_settings(settings, variable, config_user): """Find and set the FX derive/mask settings.""" # update for derive @@ -368,28 +413,20 @@ def _update_fx_settings(settings, variable, config_user): for var in get_required(variable['short_name']): if 'fx_files' in var: _augment(var, variable) - fx_files.update( - get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs'], - )) + for fxvar in var['fx_files']: + fx_files[fxvar] = _get_correct_fx_file(var, fxvar, + config_user) settings['derive']['fx_files'] = fx_files # update for landsea if 'mask_landsea' in settings: + fx_files_dict = {} # Configure ingestion of land/sea masks logger.debug('Getting fx mask settings now...') - settings['mask_landsea']['fx_files'] = [] - - var = dict(variable) - var['fx_files'] = ['sftlf', 'sftof'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs'], - ) + fx_files_dict = { + 'sftlf': _get_correct_fx_file(variable, 'sftlf', config_user), + 'sftof': _get_correct_fx_file(variable, 'sftof', config_user)} # allow both sftlf and sftof if fx_files_dict['sftlf']: @@ -399,29 +436,21 @@ def _update_fx_settings(settings, variable, config_user): if 'mask_landseaice' in settings: logger.debug('Getting fx mask settings now...') - settings['mask_landseaice']['fx_files'] = [] - - var = dict(variable) - var['fx_files'] = ['sftgif'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs'], - ) - - # allow sftgif (only, for now) + fx_files_dict = { + 'sftgif': _get_correct_fx_file(variable, 'sftgif', config_user)} if fx_files_dict['sftgif']: settings['mask_landseaice']['fx_files'].append( fx_files_dict['sftgif']) for step in ('area_statistics', 'volume_statistics'): if settings.get(step, {}).get('fx_files'): - settings[step]['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs'], - ) + var = dict(variable) + var['fx_files'] = settings.get(step, {}).get('fx_files') + fx_files_dict = { + fxvar: _get_correct_fx_file(variable, fxvar, config_user) + for fxvar in var['fx_files']} + settings[step]['fx_files'] = fx_files_dict def _read_attributes(filename): @@ -628,10 +657,8 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ) _update_extract_shape(settings, config_user) _update_fx_settings( - settings=settings, - variable=variable, - config_user=config_user, - ) + settings=settings, variable=variable, + config_user=config_user) _update_target_grid( variable=variable, variables=variables, @@ -665,11 +692,17 @@ def _get_single_preprocessor_task(variables, config_user, name, ancestor_tasks=None): - """Create preprocessor tasks for a set of datasets.""" + """Create preprocessor tasks for a set of datasets w/ special case fx.""" if ancestor_tasks is None: ancestor_tasks = [] order = _extract_preprocessor_order(profile) ancestor_products = [p for task in ancestor_tasks for p in task.products] + + if variables[0]['frequency'] == 'fx': + profile['extract_time'] = False + check.check_for_temporal_preprocs(profile) + ancestor_products = None + products = _get_preprocessor_products( variables=variables, profile=profile, @@ -939,17 +972,6 @@ def _initialize_variables(self, raw_variable, raw_datasets): if activity: variable['activity'] = activity check.variable(variable, required_keys) - if 'fx_files' in variable: - for fx_file in variable['fx_files']: - DATASET_KEYS.add(fx_file) - # Get the fx files - variable['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) - logger.info("Using fx files for var %s of dataset %s:\n%s", - variable['short_name'], variable['dataset'], - variable['fx_files']) return variables @@ -999,7 +1021,7 @@ def _set_alias(self, preprocessor_output): but it will use the dataset info to compute the others Examples: - + -------- - {project: CMIP5, model: EC-Earth, ensemble: r1i1p1} - {project: CMIP6, model: EC-Earth, ensemble: r1i1p1f1} will generate alias 'CMIP5' and 'CMIP6' @@ -1022,7 +1044,7 @@ def _set_alias(self, preprocessor_output): - {project: CMIP5, model: EC-Earth, experiment: historical} will generate alias 'EC-Earth' - Parameters + Parameters: ---------- preprocessor_output : dict preprocessor output dictionary diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 794ae46268..b45d912d86 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -97,15 +97,18 @@ def data_availability(input_files, var): required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() - for filename in input_files: - start, end = get_start_end_year(filename) - available_years.update(range(start, end + 1)) - - missing_years = required_years - available_years - if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), input_files)) + # check time avail only for non-fx variables + if var['frequency'] != 'fx': + for filename in input_files: + start, end = get_start_end_year(filename) + available_years.update(range(start, end + 1)) + + missing_years = required_years - available_years + if missing_years: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): @@ -120,6 +123,24 @@ def tasks_valid(tasks): filenames.add(product.filename) +def check_for_temporal_preprocs(profile): + """Check for temporal operations on fx variables.""" + temporal_preprocs = [ + 'extract_season', + 'extract_month', + 'annual_mean', + 'seasonal_mean', + 'time_average', + 'regrid_time', + ] + temp_preprocs = [ + preproc for preproc in profile if preproc in temporal_preprocs] + if temp_preprocs: + raise RecipeError( + "Time coordinate preprocessor step {} not permitted on fx vars \ + please remove them from recipe.".format(", ".join(temp_preprocs))) + + def extract_shape(settings): """Check that `extract_shape` arguments are valid.""" shapefile = settings.get('shapefile', '') diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 635abdbc00..a5bba85cf8 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -98,14 +98,16 @@ def check_metadata(self, logger=None): self._check_fill_value() self._check_dim_names() self._check_coords() - self._check_time_coord() + if self.frequency != 'fx': + self._check_time_coord() self._check_rank() self.report_debug_messages(logger) self.report_warnings(logger) self.report_errors() - self._add_auxiliar_time_coordinates() + if self.frequency != 'fx': + self._add_auxiliar_time_coordinates() return self._cube def report_errors(self): diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index d0279abf1d..1b3f8c975a 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -25,7 +25,7 @@ CMIP6: BADC: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' DKRZ: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' @@ -40,26 +40,7 @@ CMIP5: SMHI: '[dataset]/[ensemble]/[exp]/[frequency]' RCAST: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/' BSC: '[type]/[project]/[exp]/[dataset.lower]' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_*.nc' - input_fx_dir: - default: '/' - BADC: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - CP4CDS: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[fx_var]/latest/' - DKRZ: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - ETHZ: '[exp]/fx/[fx_var]/[dataset]/r0i0p0' - input_fx_file: '[fx_var]_fx_[dataset]_[exp]_r0i0p0.nc' - fx_mip_change: - 'areacella': 'Amon' - 'areacello': 'Omon' - 'basin': 'Omon' - 'deptho': 'Omon' - 'mrsofc': 'Lmon' - 'orog': 'Amon' - 'rootd': 'Lmon' - 'sftgif': 'Lmon' - 'sftlf': 'Amon' - 'sftof': 'Omon' - 'volcello': 'Omon' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' institutes: 'ACCESS1-0': ['CSIRO-BOM'] @@ -134,10 +115,6 @@ OBS: default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc' BSC: '[short_name]_*.nc' RCAST: '[short_name]_[mip]_[type]_[dataset]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP5' @@ -162,10 +139,6 @@ obs4mips: default: 'Tier[tier]/[dataset]' RCAST: '/' input_file: '[short_name]_[dataset]_[level]_[version]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' cmor_path: 'obs4mips' diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index f8c1bbbd9b..22dc6b1c58 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -244,11 +244,6 @@ def _write_ncl_metadata(output_dir, metadata): """Write NCL metadata files to output_dir.""" variables = [copy.deepcopy(v) for v in metadata.values()] - for variable in variables: - fx_files = variable.pop('fx_files', {}) - for fx_type in fx_files: - variable[fx_type] = fx_files[fx_type] - info = {'input_file_info': variables} # Split input_file_info into dataset and variable properties diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 791b54453d..d931f5730c 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -7,8 +7,7 @@ import yaml import esmvalcore._config -from esmvalcore._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file) +from esmvalcore._data_finder import get_input_filelist, get_output_file from esmvalcore.cmor.table import read_cmor_tables # Initialize with standard config developer file @@ -92,22 +91,3 @@ def test_get_input_filelist(root, cfg): # Test result reference = [os.path.join(root, file) for file in cfg['found_files']] assert sorted(input_filelist) == sorted(reference) - - -@pytest.mark.parametrize('cfg', CONFIG['get_input_fx_filelist']) -def test_get_input_fx_filelist(root, cfg): - """Test retrieving fx filelist.""" - create_tree(root, cfg.get('available_files'), - cfg.get('available_symlinks')) - - # Find files - rootpath = {cfg['variable']['project']: [root]} - drs = {cfg['variable']['project']: cfg['drs']} - fx_files = get_input_fx_filelist(cfg['variable'], rootpath, drs) - - # Test result - reference = { - fx_var: os.path.join(root, filename) if filename else None - for fx_var, filename in cfg['found_files'].items() - } - assert fx_files == reference diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 31d2d78538..6cbb43e97d 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -99,7 +99,7 @@ def find_files(_, filenames): filename = str(tmp_path / 'input' / filename) filenames = [] if filename.endswith('*.nc'): - filename = filename[:-len('*.nc')] + filename = filename[:-len('*.nc')] + '_' intervals = [ '1990_1999', '2000_2009', @@ -226,6 +226,41 @@ def test_simple_recipe(tmp_path, patched_datafinder, config_user): assert task.settings['custom_setting'] == 1 +def test_fx_preproc_error(tmp_path, patched_datafinder, config_user): + script = tmp_path / 'diagnostic.py' + script.write_text('') + content = dedent(""" + datasets: + - dataset: bcc-csm1-1 + + preprocessors: + preprocessor_name: + extract_season: + season: MAM + + diagnostics: + diagnostic_name: + variables: + sftlf: + preprocessor: preprocessor_name + project: CMIP5 + mip: fx + exp: historical + ensemble: r0i0p0 + start_year: 1999 + end_year: 2002 + additional_datasets: + - dataset: MPI-ESM-LR + scripts: null + """) + rec_err = "Time coordinate preprocessor step extract_season \ + not permitted on fx vars \ + please remove them from recipe." + with pytest.raises(Exception) as rec_err_exp: + get_recipe(tmp_path, content, config_user) + assert rec_err == rec_err_exp + + def test_default_preprocessor(tmp_path, patched_datafinder, config_user): content = dedent(""" @@ -313,6 +348,87 @@ def test_default_preprocessor(tmp_path, patched_datafinder, config_user): assert product.settings == defaults +def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): + + content = dedent(""" + diagnostics: + diagnostic_name: + variables: + sftlf: + project: CMIP5 + mip: fx + exp: historical + ensemble: r0i0p0 + start_year: 2000 + end_year: 2005 + additional_datasets: + - {dataset: CanESM2} + scripts: null + """) + + recipe = get_recipe(tmp_path, content, config_user) + + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert len(task.products) == 1 + product = task.products.pop() + preproc_dir = os.path.dirname(product.filename) + assert preproc_dir.startswith(str(tmp_path)) + + fix_dir = os.path.join( + preproc_dir, + 'CMIP5_CanESM2_fx_historical_r0i0p0_sftlf_2000-2005_fixed') + + defaults = { + 'load': { + 'callback': concatenate_callback, + }, + 'concatenate': {}, + 'fix_file': { + 'project': 'CMIP5', + 'dataset': 'CanESM2', + 'short_name': 'sftlf', + 'output_dir': fix_dir, + }, + 'fix_data': { + 'project': 'CMIP5', + 'dataset': 'CanESM2', + 'short_name': 'sftlf', + 'cmor_table': 'CMIP5', + 'mip': 'fx', + 'frequency': 'fx', + }, + 'fix_metadata': { + 'project': 'CMIP5', + 'dataset': 'CanESM2', + 'short_name': 'sftlf', + 'cmor_table': 'CMIP5', + 'mip': 'fx', + 'frequency': 'fx', + }, + 'cmor_check_metadata': { + 'cmor_table': 'CMIP5', + 'mip': 'fx', + 'short_name': 'sftlf', + 'frequency': 'fx', + }, + 'cmor_check_data': { + 'cmor_table': 'CMIP5', + 'mip': 'fx', + 'short_name': 'sftlf', + 'frequency': 'fx', + }, + 'cleanup': { + 'remove': [fix_dir] + }, + 'save': { + 'compress': False, + 'filename': product.filename, + } + } + assert product.settings == defaults + + def test_empty_variable(tmp_path, patched_datafinder, config_user): """Test that it is possible to specify all information in the dataset.""" content = dedent("""