diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 0efc7bc571..be081d343f 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -179,18 +179,6 @@ def get_institutes(variable): return CFG.get(project, {}).get('institutes', {}).get(dataset, []) -def replace_mip_fx(fx_file): - """Replace MIP so to retrieve correct fx files.""" - default_mip = 'Amon' - if fx_file not in CFG['CMIP5']['fx_mip_change']: - logger.warning( - 'mip for fx variable %s is not specified in ' - 'config_developer.yml, using default (%s)', fx_file, default_mip) - new_mip = CFG['CMIP5']['fx_mip_change'].get(fx_file, default_mip) - logger.debug("Switching mip for fx file finding to %s", new_mip) - return new_mip - - TAGS_CONFIG_FILE = os.path.join( DIAGNOSTICS_PATH, 'config-references.yml') diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 258b315c7d..66e38a8753 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -9,15 +9,14 @@ import os import re -from ._config import get_project_config, replace_mip_fx -from .cmor.table import CMOR_TABLES +from ._config import get_project_config logger = logging.getLogger(__name__) def find_files(dirnames, filenames): """Find files matching filenames in dirnames.""" - logger.debug("Looking for files matching %s in %s", filenames, dirnames) + logger.info("Looking for files matching %s in %s", filenames, dirnames) result = [] for dirname in dirnames: @@ -93,7 +92,7 @@ def select_files(filenames, start_year, end_year): return selection -def _replace_tags(path, variable, fx_var=None): +def _replace_tags(path, variable): """Replace tags in the config-developer's file with actual values.""" path = path.strip('/') @@ -104,9 +103,7 @@ def _replace_tags(path, variable, fx_var=None): original_tag = tag tag, _, _ = _get_caps_options(tag) - if tag == 'fx_var': - replacewith = fx_var - elif tag == 'latestversion': # handled separately later + if tag == 'latestversion': # handled separately later continue elif tag in variable: replacewith = variable[tag] @@ -196,16 +193,16 @@ def get_rootpath(rootpath, project): raise KeyError('default rootpath must be specified in config-user file') -def _find_input_dirs(variable, rootpath, drs, fx_var=None): +def _find_input_dirs(variable, rootpath, drs): """Return a the full paths to input directories.""" project = variable['project'] root = get_rootpath(rootpath, project) - input_type = 'input_{}dir'.format('fx_' if fx_var else '') + input_type = 'input_dir' path_template = _select_drs(input_type, drs, project) dirnames = [] - for dirname_template in _replace_tags(path_template, variable, fx_var): + for dirname_template in _replace_tags(path_template, variable): for base_path in root: dirname = os.path.join(base_path, dirname_template) dirname = _resolve_latestversion(dirname) @@ -218,21 +215,17 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None): return dirnames -def _get_filenames_glob(variable, drs, fx_var=None): +def _get_filenames_glob(variable, drs): """Return patterns that can be used to look for input files.""" - input_type = 'input_{}file'.format('fx_' if fx_var else '') + input_type = 'input_file' path_template = _select_drs(input_type, drs, variable['project']) - filenames_glob = _replace_tags(path_template, variable, fx_var) + filenames_glob = _replace_tags(path_template, variable) return filenames_glob -def _find_input_files(variable, rootpath, drs, fx_var=None): - logger.debug("Looking for input %sfiles for variable %s of dataset %s", - fx_var + ' fx ' if fx_var else '', variable['short_name'], - variable['dataset']) - - input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var) - filenames_glob = _get_filenames_glob(variable, drs, fx_var) +def _find_input_files(variable, rootpath, drs): + input_dirs = _find_input_dirs(variable, rootpath, drs) + filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) return files @@ -240,29 +233,18 @@ def _find_input_files(variable, rootpath, drs, fx_var=None): def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" + # change ensemble to fixed r0i0p0 for fx variables + if variable['project'] == 'CMIP5'and variable['frequency'] == 'fx': + variable['ensemble'] = 'r0i0p0' files = _find_input_files(variable, rootpath, drs) - files = select_files(files, variable['start_year'], variable['end_year']) + # do time gating only for non-fx variables + if variable['frequency'] != 'fx': + files = select_files(files, variable['start_year'], + variable['end_year']) return files -def get_input_fx_filelist(variable, rootpath, drs): - """Return a dict with the full path to fx input files.""" - fx_files = {} - for fx_var in variable['fx_files']: - var = dict(variable) - var['mip'] = replace_mip_fx(fx_var) - table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) - var['frequency'] = table.frequency - realm = getattr(table.get(var['short_name']), 'modeling_realm', None) - var['modeling_realm'] = realm if realm else table.realm - - files = _find_input_files(var, rootpath, drs, fx_var) - fx_files[fx_var] = files[0] if files else None - - return fx_files - - -def get_output_file(variable, preproc_dir): +def get_output_file(variable, preproc_dir, parent_var=None): """Return the full path to the output (preprocessed) file.""" cfg = get_project_config(variable['project']) @@ -271,12 +253,20 @@ def get_output_file(variable, preproc_dir): variable = dict(variable) variable['exp'] = '-'.join(variable['exp']) - outfile = os.path.join( - preproc_dir, - variable['diagnostic'], - variable['variable_group'], - _replace_tags(cfg['output_file'], variable)[0] + '.nc', - ) + if not parent_var: + outfile = os.path.join( + preproc_dir, + variable['diagnostic'], + variable['variable_group'], + _replace_tags(cfg['output_file'], variable)[0] + '.nc', + ) + else: + outfile = os.path.join( + preproc_dir, + variable['diagnostic'], + parent_var['variable_group'], + _replace_tags(cfg['output_file'], variable)[0] + '.nc', + ) return outfile diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3dc6cdf48c..2b607e80ff 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -6,13 +6,14 @@ from copy import deepcopy import yaml + from netCDF4 import Dataset -from . import __version__ from . import _recipe_checks as check +from . import __version__ from ._config import TAGS, get_institutes, replace_tags -from ._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file, get_statistic_output_file) +from ._data_finder import (get_input_filelist, get_output_file, + get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance from ._recipe_checks import RecipeError from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks, @@ -361,65 +362,95 @@ def _get_default_settings(variable, config_user, derive=False): return settings -def _update_fx_settings(settings, variable, config_user): +def _update_fx_files(fx_varlist, config_user, parent_variable): + """Get the fx files dict for a list of fx variables.""" + fx_files_dict = {} + for fx_variable in fx_varlist: + if fx_variable['dataset'] == parent_variable['dataset']: + fx_files_dict[fx_variable['short_name']] = get_output_file( + fx_variable, + config_user['preproc_dir'], + parent_variable) + + return fx_files_dict + + +def _add_fxvar_keys(fx_var_dict, variable): + """Add keys specific to fx variable to use get_input_filelist.""" + fx_variable = dict(variable) + + # remove keys that dont belong to fx var analysis + if 'fx_files' in fx_variable: + del fx_variable['fx_files'] + if 'force_derivation' in fx_variable: + del fx_variable['force_derivation'] + + # set fx recognition flag and variable names + fx_variable['fxvar'] = True + fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable['short_name'] = fx_var_dict['short_name'] + + # specificities of project + if fx_variable['project'] == 'CMIP5': + fx_variable['mip'] = 'fx' + fx_variable['ensemble'] = 'r0i0p0' + elif fx_variable['project'] == 'CMIP6': + fx_variable['grid'] = variable['grid'] + if 'mip' in fx_var_dict: + fx_variable['mip'] = fx_var_dict['mip'] + # add missing cmor info + _add_cmor_info(fx_variable, override=True) + + return fx_variable + + +def _update_fx_settings(settings, variable): """Find and set the FX derive/mask settings.""" # update for derive if 'derive' in settings: - fx_files = {} - for var in get_required(variable['short_name']): - if 'fx_files' in var: - _augment(var, variable) - fx_files.update( - get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs'])) - settings['derive']['fx_files'] = fx_files + if 'fx_files' in variable: + settings['derive']['fx_files'] = variable['fx_files'] # update for landsea if 'mask_landsea' in settings: # Configure ingestion of land/sea masks - logger.debug('Getting fx mask settings now...') + if 'fx_files' not in variable: + fx_set = "fx_files: ['sftlf', 'sftof']" + raise RecipeError( + "Masking: {} missing from {}".format(fx_set, + variable['short_name'])) + logger.debug('Getting fx mask settings now...') settings['mask_landsea']['fx_files'] = [] - - var = dict(variable) - var['fx_files'] = ['sftlf', 'sftof'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) - - # allow both sftlf and sftof - if fx_files_dict['sftlf']: - settings['mask_landsea']['fx_files'].append(fx_files_dict['sftlf']) - if fx_files_dict['sftof']: - settings['mask_landsea']['fx_files'].append(fx_files_dict['sftof']) + if 'fx_files' in variable: + fx_files = variable['fx_files'] + # allow both sftlf and sftof + if fx_files['sftlf']: + settings['mask_landsea']['fx_files'].append(fx_files['sftlf']) + if fx_files['sftof']: + settings['mask_landsea']['fx_files'].append(fx_files['sftof']) if 'mask_landseaice' in settings: + if 'fx_files' not in variable: + fx_set = "fx_files: ['sftgif']" + raise RecipeError( + "Masking: {} missing from {}".format(fx_set, + variable['short_name'])) logger.debug('Getting fx mask settings now...') settings['mask_landseaice']['fx_files'] = [] - - var = dict(variable) - var['fx_files'] = ['sftgif'] - fx_files_dict = get_input_fx_filelist( - variable=var, - rootpath=config_user['rootpath'], - drs=config_user['drs']) - - # allow sftgif (only, for now) - if fx_files_dict['sftgif']: - settings['mask_landseaice']['fx_files'].append( - fx_files_dict['sftgif']) + if 'fx_files' in variable: + fx_files = variable['fx_files'] + # allow sftgif (only, for now) + if fx_files['sftlf']: + settings['mask_landseaice']['fx_files'].append( + fx_files['sftgif']) for step in ('area_statistics', 'volume_statistics'): if settings.get(step, {}).get('fx_files'): - settings[step]['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs'], - ) + if 'fx_files' in variable: + fx_files = variable['fx_files'] + settings[step]['fx_files'] = fx_files def _read_attributes(filename): @@ -438,22 +469,26 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset.""" # Find input files locally. + var = dict(variable) + # change ensemble to fixed r0i0p0 + if var['project'] == 'CMIP5': + if var['frequency'] == 'fx': + var['ensemble'] = 'r0i0p0' input_files = get_input_filelist( - variable=variable, + variable=var, rootpath=config_user['rootpath'], drs=config_user['drs']) - # Set up downloading using synda if requested. # Do not download if files are already available locally. if config_user['synda_download'] and not input_files: - input_files = synda_search(variable) + input_files = synda_search(var) logger.info("Using input files for variable %s of dataset %s:\n%s", - variable['short_name'], variable['dataset'], + var['short_name'], var['dataset'], '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') - or variable['dataset'] == variable.get('reference_dataset')): - check.data_availability(input_files, variable) + or var['dataset'] == var.get('reference_dataset')): + check.data_availability(input_files, var) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -583,13 +618,18 @@ def get_matching(attributes): def _get_preprocessor_products(variables, profile, order, ancestor_products, - config_user): + config_user, parent_variable=None): """Get preprocessor product definitions for a set of datasets.""" products = set() for variable in variables: - variable['filename'] = get_output_file(variable, - config_user['preproc_dir']) + if not parent_variable: + variable['filename'] = get_output_file(variable, + config_user['preproc_dir']) + else: + variable['filename'] = get_output_file(variable, + config_user['preproc_dir'], + parent_variable) if ancestor_products: grouped_ancestors = _match_products(ancestor_products, variables) @@ -607,7 +647,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, settings=settings, config_user=config_user) _update_fx_settings( - settings=settings, variable=variable, config_user=config_user) + settings=settings, variable=variable) _update_target_grid( variable=variable, variables=variables, @@ -636,19 +676,33 @@ def _get_single_preprocessor_task(variables, profile, config_user, name, - ancestor_tasks=None): + ancestor_tasks=None, + parent_variable=None): """Create preprocessor tasks for a set of datasets.""" if ancestor_tasks is None: ancestor_tasks = [] order = _extract_preprocessor_order(profile) - ancestor_products = [p for task in ancestor_tasks for p in task.products] - products = _get_preprocessor_products( - variables=variables, - profile=profile, - order=order, - ancestor_products=ancestor_products, - config_user=config_user, - ) + ancestor_products = [ + p for task in ancestor_tasks for p in task.products + if not task.name.split('/')[1].startswith('fx_') + ] + if not parent_variable: + products = _get_preprocessor_products( + variables=variables, + profile=profile, + order=order, + ancestor_products=ancestor_products, + config_user=config_user, + ) + else: + products = _get_preprocessor_products( + variables=variables, + profile=profile, + order=order, + ancestor_products=ancestor_products, + config_user=config_user, + parent_variable=parent_variable + ) if not products: raise RecipeError( @@ -730,13 +784,40 @@ def append(group_prefix, var): append(group_prefix, var) else: # Process input data needed to derive variable - for var in get_required(variable['short_name']): - _augment(var, variable) - append(group_prefix, var) + if variable['frequency'] != 'fx': + for var in get_required(variable['short_name']): + _augment(var, variable) + append(group_prefix, var) return derive_input +def _get_fx_tasks(fx_variable, config_user, task_name, parent_variable): + """Create a list of fx variables tasks.""" + fx_profile = {'fix_file': True, + 'load': True, + 'fix_metadata': True, + 'extract_time': False, + 'cmor_check_metadata': True, + 'save': True, + 'cleanup': True} + + fx_task_name = task_name.split( + TASKSEP)[0] + TASKSEP + 'fx_' + fx_variable['short_name'] + logger.info("Creating preprocessor fx-default task for variable '%s'", + fx_variable['short_name']) + + # Create (final) preprocessor task + fx_task = _get_single_preprocessor_task( + [fx_variable], + fx_profile, + config_user, + name=fx_task_name, + parent_variable=parent_variable) + + return fx_task + + def _get_preprocessor_task(variables, profiles, config_user, task_name): """Create preprocessor task(s) for a set of datasets.""" # First set up the preprocessor profile @@ -747,18 +828,30 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): "Unknown preprocessor {} in variable {} of diagnostic {}".format( preproc_name, variable['short_name'], variable['diagnostic'])) profile = deepcopy(profiles[variable['preprocessor']]) - logger.info("Creating preprocessor '%s' task for variable '%s'", - variable['preprocessor'], variable['short_name']) variables = _limit_datasets(variables, profile, config_user.get('max_datasets')) for variable in variables: _add_cmor_info(variable) - # Create preprocessor task(s) + + # manage variable lists for regular and fx variables + regular_variables = [var for var in variables if 'fxvar' not in var.keys()] + fx_variables = [var for var in variables if 'fxvar' in var.keys()] + + # create tasks for fx variables (first ones) + fx_tasks = [] + if fx_variables: + fx_tasks = [ + _get_fx_tasks(fx_var, config_user, + task_name, regular_variables[0]) + for fx_var in fx_variables] + + # create derive tasks derive_tasks = [] if variable.get('derive'): # Create tasks to prepare the input data for the derive step derive_profile, profile = _split_derive_profile(profile) - derive_input = _get_derive_input_variables(variables, config_user) + derive_input = _get_derive_input_variables(regular_variables, + config_user) for derive_variables in derive_input.values(): for derive_variable in derive_variables: @@ -769,15 +862,27 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): derive_variables, derive_profile, config_user, + ancestor_tasks=fx_tasks, name=derive_name) derive_tasks.append(task) - # Create (final) preprocessor task + # cumulative ancestors + all_ancestor_tasks = fx_tasks + derive_tasks + + logger.info("Creating preprocessor '%s' task for variable '%s'", + regular_variables[0]['preprocessor'], + regular_variables[0]['short_name']) + + # don't do time gating for fx variables + if regular_variables[0]['frequency'] == 'fx': + profile['extract_time'] = False + + # add all regular variable task task = _get_single_preprocessor_task( - variables, + regular_variables, profile, config_user, - ancestor_tasks=derive_tasks, + ancestor_tasks=all_ancestor_tasks, name=task_name) return task @@ -866,27 +971,54 @@ def _initialize_datasets(raw_datasets): check.duplicate_datasets(datasets) return datasets - def _initialize_variables(self, raw_variable, raw_datasets): - """Define variables for all datasets.""" - variables = [] - - raw_variable = deepcopy(raw_variable) - datasets = self._initialize_datasets( - raw_datasets + raw_variable.pop('additional_datasets', [])) - - for index, dataset in enumerate(datasets): - variable = deepcopy(raw_variable) - variable.update(dataset) - variable['recipe_dataset_index'] = index - if ('cmor_table' not in variable - and variable.get('project') in CMOR_TABLES): - variable['cmor_table'] = variable['project'] + def _get_required_var_keys(self, variable, dataset, + index, fx_var_dict=None): + """Assemble correct variable attributes.""" + if fx_var_dict: + variable['short_name'] = fx_var_dict['short_name'] + variable.update(dataset) + variable['recipe_dataset_index'] = index + if ('cmor_table' not in variable + and variable.get('project') in CMOR_TABLES): + variable['cmor_table'] = variable['project'] + if not fx_var_dict: if 'end_year' in variable and 'max_years' in self._cfg: variable['end_year'] = min( variable['end_year'], variable['start_year'] + self._cfg['max_years'] - 1) + else: + variable = _add_fxvar_keys(fx_var_dict, variable) + return variable + + def _assemble_varlist(self, raw_variable, datasets): + """Assemble complete list of variables.""" + variables = [] + + for index, dataset in enumerate(datasets): + variable = deepcopy(raw_variable) + variable = self._get_required_var_keys(variable, dataset, + index) variables.append(variable) + if 'fx_files' not in raw_variable: + return variables + else: + for fx_var_dict in raw_variable['fx_files']: + for index, dataset in enumerate(datasets): + fx_variable = deepcopy(raw_variable) + fx_variable = self._get_required_var_keys( + fx_variable, dataset, index, fx_var_dict=fx_var_dict) + variables.append(fx_variable) + return variables + + def _initialize_variables(self, raw_variable, raw_datasets): + """Define variables for all datasets.""" + raw_variable = deepcopy(raw_variable) + datasets = self._initialize_datasets( + raw_datasets + raw_variable.pop('additional_datasets', [])) + + # get full list of variables + variables = self._assemble_varlist(raw_variable, datasets) required_keys = { 'short_name', 'mip', @@ -904,18 +1036,20 @@ def _initialize_variables(self, raw_variable, raw_datasets): if institute: variable['institute'] = institute check.variable(variable, required_keys) + + # add fx files information if 'fx_files' in variable: for fx_file in variable['fx_files']: - DATASET_KEYS.add(fx_file) - # Get the fx files - variable['fx_files'] = get_input_fx_filelist( - variable=variable, - rootpath=self._cfg['rootpath'], - drs=self._cfg['drs']) + DATASET_KEYS.add(fx_file['short_name']) + fx_varlist = [ + var for var in variables if 'fxvar' in var + ] + variable['fx_files'] = _update_fx_files(fx_varlist, + self._cfg, + variable) logger.info("Using fx files for var %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], variable['fx_files']) - return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, @@ -925,7 +1059,6 @@ def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, diagnostic_name) preprocessor_output = {} - for variable_group, raw_variable in raw_variables.items(): if raw_variable is None: raw_variable = {} @@ -1042,6 +1175,7 @@ def initialize_tasks(self): name=task_name) tasks.add(task) + # check tasks check.tasks_valid(tasks) # Resolve diagnostic ancestors diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index a6ab607df0..ee0e569304 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -97,15 +97,18 @@ def data_availability(input_files, var): required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() - for filename in input_files: - start, end = get_start_end_year(filename) - available_years.update(range(start, end + 1)) - - missing_years = required_years - available_years - if missing_years: - raise RecipeError( - "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), input_files)) + # check time avail only for non-fx variables + if var['frequency'] != 'fx': + for filename in input_files: + start, end = get_start_end_year(filename) + available_years.update(range(start, end + 1)) + + missing_years = required_years - available_years + if missing_years: + raise RecipeError( + "No input data available for years {} in files {}".format( + ", ".join(str(year) for year in missing_years), + input_files)) def tasks_valid(tasks): diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 81682ac6bd..5f4dcd5d3e 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -97,13 +97,15 @@ def check_metadata(self, logger=None): self._check_fill_value() self._check_dim_names() self._check_coords() - self._check_time_coord() + if self.frequency != 'fx': + self._check_time_coord() self._check_rank() self.report_warnings(logger) self.report_errors() - self._add_auxiliar_time_coordinates() + if self.frequency != 'fx': + self._add_auxiliar_time_coordinates() return self._cube def report_errors(self): diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index 06433da1bf..eb8e3464f6 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -24,7 +24,7 @@ CMIP6: BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' institutes: @@ -147,26 +147,7 @@ CMIP5: ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/' SMHI: '[dataset]/[ensemble]/[exp]/[frequency]' BSC: '[type]/[project]/[exp]/[dataset.lower]' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_*.nc' - input_fx_dir: - default: '/' - BADC: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - CP4CDS: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[fx_var]/latest/' - DKRZ: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]' - ETHZ: '[exp]/fx/[fx_var]/[dataset]/r0i0p0' - input_fx_file: '[fx_var]_fx_[dataset]_[exp]_r0i0p0.nc' - fx_mip_change: - 'areacella': 'Amon' - 'areacello': 'Omon' - 'basin': 'Omon' - 'deptho': 'Omon' - 'mrsofc': 'Lmon' - 'orog': 'Amon' - 'rootd': 'Lmon' - 'sftgif': 'Lmon' - 'sftlf': 'Amon' - 'sftof': 'Omon' - 'volcello': 'Omon' + input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc' output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' institutes: 'ACCESS1-0': ['CSIRO-BOM'] @@ -239,10 +220,6 @@ OBS: input_file: default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc' BSC: '[short_name]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP5' @@ -251,10 +228,6 @@ obs4mips: input_dir: default: 'Tier[tier]/[dataset]' input_file: '[short_name]_[dataset]_[level]_[version]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_fx_[fx_var].nc' output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]' cmor_type: 'CMIP6' cmor_path: 'obs4mips' diff --git a/esmvalcore/preprocessor/_derive/nbp_grid.py b/esmvalcore/preprocessor/_derive/nbp_grid.py index 54a6b95ce6..ff6169fda6 100644 --- a/esmvalcore/preprocessor/_derive/nbp_grid.py +++ b/esmvalcore/preprocessor/_derive/nbp_grid.py @@ -9,7 +9,7 @@ class DerivedVariable(DerivedVariableBase): # Required variables required = [{ 'short_name': 'nbp', - 'fx_files': ['sftlf'], + 'fx_files': [{'short_name': 'sftlf'}], }] @staticmethod diff --git a/tests/integration/preprocessor/_derive/test_interface.py b/tests/integration/preprocessor/_derive/test_interface.py index 460d3f5341..c0854ed529 100644 --- a/tests/integration/preprocessor/_derive/test_interface.py +++ b/tests/integration/preprocessor/_derive/test_interface.py @@ -26,7 +26,7 @@ def test_get_required_with_fx(): reference = [{ 'short_name': 'nbp', - 'fx_files': ['sftlf'], + 'fx_files': [{'short_name': 'sftlf'}], }] assert variables == reference diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 791b54453d..d931f5730c 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -7,8 +7,7 @@ import yaml import esmvalcore._config -from esmvalcore._data_finder import (get_input_filelist, get_input_fx_filelist, - get_output_file) +from esmvalcore._data_finder import get_input_filelist, get_output_file from esmvalcore.cmor.table import read_cmor_tables # Initialize with standard config developer file @@ -92,22 +91,3 @@ def test_get_input_filelist(root, cfg): # Test result reference = [os.path.join(root, file) for file in cfg['found_files']] assert sorted(input_filelist) == sorted(reference) - - -@pytest.mark.parametrize('cfg', CONFIG['get_input_fx_filelist']) -def test_get_input_fx_filelist(root, cfg): - """Test retrieving fx filelist.""" - create_tree(root, cfg.get('available_files'), - cfg.get('available_symlinks')) - - # Find files - rootpath = {cfg['variable']['project']: [root]} - drs = {cfg['variable']['project']: cfg['drs']} - fx_files = get_input_fx_filelist(cfg['variable'], rootpath, drs) - - # Test result - reference = { - fx_var: os.path.join(root, filename) if filename else None - for fx_var, filename in cfg['found_files'].items() - } - assert fx_files == reference diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 90730ad990..f318f17614 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -97,7 +97,7 @@ def find_files(_, filenames): filename = str(tmp_path / 'input' / filename) filenames = [] if filename.endswith('*.nc'): - filename = filename[:-len('*.nc')] + filename = filename[:-len('*.nc')] + '_' intervals = [ '1990_1999', '2000_2009', @@ -599,6 +599,7 @@ def test_derive_with_fx(tmp_path, patched_datafinder, config_user): end_year: 2005 derive: true force_derivation: true + fx_files: [{'short_name': 'sftlf'}] additional_datasets: - {dataset: GFDL-CM3, ensemble: r1i1p1} scripts: null @@ -611,15 +612,17 @@ def test_derive_with_fx(tmp_path, patched_datafinder, config_user): task = recipe.tasks.pop() assert task.name == 'diagnostic_name' + TASKSEP + 'nbp_grid' - assert len(task.ancestors) == 1 - ancestor = [t for t in task.ancestors][0] + # fx_sftlf and nbp_grid so 2 + assert len(task.ancestors) == 2 + fx_ancestor = [t for t in task.ancestors][0] + assert fx_ancestor.name == 'diagnostic_name/fx_sftlf' + ancestor = [t for t in task.ancestors][1] assert ancestor.name == 'diagnostic_name/nbp_grid_derive_input_nbp' # Check product content of tasks assert len(task.products) == 1 product = task.products.pop() assert 'derive' in product.settings - assert product.attributes['short_name'] == 'nbp_grid' assert 'fx_files' in product.settings['derive'] assert 'sftlf' in product.settings['derive']['fx_files'] assert product.settings['derive']['fx_files']['sftlf'] is not None