diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 2724a91fac..f30543612a 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -303,6 +303,19 @@ is for example useful for climate models which do not offer land/sea fraction files. This arguments also accepts the special dataset specifiers ``reference_dataset`` and ``alternative_dataset``. +Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain +experiment is preferred for fx data retrieval: + +.. code-block:: yaml + + preprocessors: + preproc_weighting: + weighting_landsea_fraction: + area_type: land + exclude: ['CanESM2', 'reference_dataset'] + fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] + + See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -351,7 +364,21 @@ and requires only one argument: ``mask_out``: either ``land`` or ``sea``. The preprocessor automatically retrieves the corresponding mask (``fx: stfof`` in this case) and applies it so that sea-covered grid cells are set to missing. Conversely, it retrieves the ``fx: sftlf`` mask when land needs to be -masked out, respectively. If the corresponding fx file is not found (which is +masked out, respectively. + +Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain +experiment is preferred for fx data retrieval: + + +.. code-block:: yaml + + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] + +If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the preprocessor attempts to mask the data using Natural Earth mask files (that are vectorized rasters). As mentioned above, the spatial resolution of the the @@ -377,7 +404,20 @@ losing generality. To mask ice out, ``mask_landseaice`` can be used: and requires only one argument: ``mask_out``: either ``landsea`` or ``ice``. As in the case of ``mask_landsea``, the preprocessor automatically retrieves -the ``fx_files: [sftgif]`` mask. +the ``fx_variables: [sftgif]`` mask. + +Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain +experiment is preferred for fx data retrieval: + + +.. code-block:: yaml + + preprocessors: + landseaicemask: + mask_landseaice: + mask_out: sea + fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + See also :func:`esmvalcore.preprocessor.mask_landseaice`. @@ -398,37 +438,6 @@ and it requires only one argument: ``mask_out``: only ``glaciated``. See also :func:`esmvalcore.preprocessor.mask_landseaice`. -Mask files ----------- - -At the core of the land/sea/ice masking in the preprocessor are the mask files -(whether it be fx type or Natural Earth type of files); these files (bar -Natural Earth) can be retrieved and used in the diagnostic phase as well. By -specifying the ``fx_files:`` key in the variable in diagnostic in the recipe, -and populating it with a list of desired files e.g.: - -.. code-block:: yaml - - variables: - ta: - preprocessor: my_masking_preprocessor - fx_files: [sftlf, sftof, sftgif, areacello, areacella] - -Such a recipe will automatically retrieve all the ``fx_files: [sftlf, sftof, -sftgif, areacello, areacella]``-type fx files for each of the variables they -are needed for and then, in the diagnostic phase, these mask files will be -available for the developer to use them as they need to. The `fx_files` -attribute of the big `variable` nested dictionary that gets passed to the -diagnostic is, in turn, a dictionary on its own, and members of it can be -accessed in the diagnostic through a simple loop over the ``config`` diagnostic -variable items e.g.: - -.. code-block:: python - - for filename, attributes in config['input_data'].items(): - sftlf_file = attributes['fx_files']['sftlf'] - areacello_file = attributes['fx_files']['areacello'] - .. _masking of missing values: Missing values masks @@ -1132,6 +1141,23 @@ Note that this function is applied over the entire dataset. If only a specific region, depth layer or time period is required, then those regions need to be removed using other preprocessor operations in advance. +The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; +the user may specify it as a list of variables e.g. + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +as a CMOR variable can permit): + +.. code-block:: yaml + + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + +The recipe parser wil automatically find the data files that are associated with these +variables and pass them to the function for loading and processing. + See also :func:`esmvalcore.preprocessor.area_statistics`. @@ -1173,7 +1199,24 @@ This function takes the argument: ``operator``, which defines the operation to apply over the volume. No depth coordinate is required as this is determined by Iris. This function -works best when the ``fx_files`` provide the cell volume. +works best when the ``fx_variables`` provide the cell volume. + +The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; +the user may specify it as a list of variables e.g. + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +as a CMOR variable can permit): + +.. code-block:: yaml + + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + +The recipe parser wil automatically find the data files that are associated with these +variables and pass them to the function for loading and processing. See also :func:`esmvalcore.preprocessor.volume_statistics`. diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ac68be8c40..27e988d514 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -12,7 +12,8 @@ from . import __version__ from . import _recipe_checks as check -from ._config import TAGS, get_activity, get_institutes, replace_tags +from ._config import (TAGS, get_activity, get_institutes, get_project_config, + replace_tags) from ._data_finder import (get_input_filelist, get_output_file, get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance @@ -128,13 +129,13 @@ def _special_name_to_dataset(variable, special_name): if special_name in ('reference_dataset', 'alternative_dataset'): if special_name not in variable: raise RecipeError( - "Preprocessor {} uses {}, but {} is not defined for " - "variable {} of diagnostic {}".format( - variable['preprocessor'], - special_name, - special_name, - variable['short_name'], - variable['diagnostic'], + "Preprocessor {preproc} uses {name}, but {name} is not " + "defined for variable {short_name} of diagnostic " + "{diagnostic}".format( + preproc=variable['preprocessor'], + name=special_name, + short_name=variable['short_name'], + diagnostic=variable['diagnostic'], )) special_name = variable[special_name] @@ -174,8 +175,8 @@ def _update_target_levels(variable, variables, settings, config_user): short_name=variable_data['short_name'], mip=variable_data['mip'], frequency=variable_data['frequency'], - fix_dir=os.path.splitext( - variable_data['filename'])[0] + '_fixed', + fix_dir=os.path.splitext(variable_data['filename'])[0] + + '_fixed', ) @@ -231,8 +232,8 @@ def _dataset_to_file(variable, config_user): for required_var in required_vars: _augment(required_var, variable) _add_cmor_info(required_var, override=True) - (files, dirnames, filenames) = _get_input_files(required_var, - config_user) + (files, dirnames, + filenames) = _get_input_files(required_var, config_user) if files: variable = required_var break @@ -261,8 +262,7 @@ def _limit_datasets(variables, profile, max_datasets=0): if variable not in limited: limited.append(variable) - logger.info("Only considering %s", - ', '.join(v['alias'] for v in limited)) + logger.info("Only considering %s", ', '.join(v['alias'] for v in limited)) return limited @@ -355,59 +355,74 @@ def _get_default_settings(variable, config_user, derive=False): def _add_fxvar_keys(fx_var_dict, variable): """Add keys specific to fx variable to use get_input_filelist.""" fx_variable = dict(variable) + fx_variable.update(fx_var_dict) # set variable names fx_variable['variable_group'] = fx_var_dict['short_name'] - fx_variable['short_name'] = fx_var_dict['short_name'] - # specificities of project + # add special ensemble for CMIP5 only if fx_variable['project'] == 'CMIP5': - fx_variable['mip'] = 'fx' fx_variable['ensemble'] = 'r0i0p0' - elif fx_variable['project'] == 'CMIP6': - fx_variable['grid'] = variable['grid'] - if 'mip' in fx_var_dict: - fx_variable['mip'] = fx_var_dict['mip'] - elif fx_variable['project'] in ['OBS', 'OBS6', 'obs4mips']: - fx_variable['mip'] = 'fx' + # add missing cmor info _add_cmor_info(fx_variable, override=True) + return fx_variable -def _get_correct_fx_file(variable, fx_varname, config_user): +def _get_fx_file(variable, fx_variable, config_user): """Get fx files (searching all possible mips).""" - # TODO: allow user to specify certain mip if desired + # make it a dict + if isinstance(fx_variable, str): + fx_varname = fx_variable + fx_variable = {'short_name': fx_varname} + else: + fx_varname = fx_variable['short_name'] + + # assemble info from master variable var = dict(variable) var_project = variable['project'] + # check if project in config-developer + try: + get_project_config(var_project) + except ValueError: + raise RecipeError( + f"Requested fx variable '{fx_varname}' with parent variable" + f"'{variable}' does not have a '{var_project}' project" + f"in config-developer.") cmor_table = CMOR_TABLES[var_project] + valid_fx_vars = [] - # Get all fx-related mips ('fx' always first, original mip last) - fx_mips = ['fx'] - fx_mips.extend( - [key for key in cmor_table.tables if 'fx' in key and key != 'fx']) - fx_mips.append(variable['mip']) + # force only the mip declared by user + if 'mip' in fx_variable: + fx_mips = [fx_variable['mip']] + else: + # Get all fx-related mips (original var mip, + # 'fx' and extend from cmor tables) + fx_mips = [variable['mip']] + fx_mips.extend(mip for mip in cmor_table.tables if 'fx' in mip) # Search all mips for available variables + # priority goes to user specified mip if available searched_mips = [] + fx_files = [] for fx_mip in fx_mips: - fx_variable = cmor_table.get_variable(fx_mip, fx_varname) - if fx_variable is not None: + fx_cmor_variable = cmor_table.get_variable(fx_mip, fx_varname) + if fx_cmor_variable is not None: + fx_var_dict = dict(fx_variable) searched_mips.append(fx_mip) - fx_var = _add_fxvar_keys( - {'short_name': fx_varname, 'mip': fx_mip}, var) - logger.debug("For CMIP6 fx variable '%s', found table '%s'", - fx_varname, fx_mip) - fx_files = _get_input_files(fx_var, config_user)[0] + fx_var_dict['mip'] = fx_mip + fx_var_dict = _add_fxvar_keys(fx_var_dict, var) + valid_fx_vars.append(fx_var_dict) + logger.debug("For fx variable '%s', found table '%s'", fx_varname, + fx_mip) + fx_files = _get_input_files(fx_var_dict, config_user)[0] # If files found, return them if fx_files: - logger.debug("Found CMIP6 fx variables '%s':\n%s", - fx_varname, pformat(fx_files)) + logger.debug("Found fx variables '%s':\n%s", fx_varname, + pformat(fx_files)) break - else: - # No files found - fx_files = [] # If fx variable was not found in any table, raise exception if not searched_mips: @@ -415,22 +430,17 @@ def _get_correct_fx_file(variable, fx_varname, config_user): f"Requested fx variable '{fx_varname}' not available in " f"any 'fx'-related CMOR table ({fx_mips}) for '{var_project}'") + # flag a warning + if not fx_files: + logger.warning("Missing data for fx variable '%s'", fx_varname) + # allow for empty lists corrected for by NE masks if fx_files: fx_files = fx_files[0] + if valid_fx_vars: + valid_fx_vars = valid_fx_vars[0] - return fx_files - - -def _get_landsea_fraction_fx_dict(variable, config_user): - """Get dict of available ``sftlf`` and ``sftof`` variables.""" - fx_dict = {} - fx_vars = ['sftlf'] - if variable['project'] != 'obs4mips': - fx_vars.append('sftof') - for fx_var in fx_vars: - fx_dict[fx_var] = _get_correct_fx_file(variable, fx_var, config_user) - return fx_dict + return fx_files, valid_fx_vars def _exclude_dataset(settings, variable, step): @@ -452,41 +462,51 @@ def _update_weighting_settings(settings, variable): _exclude_dataset(settings, variable, 'weighting_landsea_fraction') +def _update_fx_files(step_name, settings, variable, config_user, fx_vars): + """Update settings with mask fx file list or dict.""" + if not fx_vars: + return + + fx_vars = [ + _get_fx_file(variable, fxvar, config_user) + for fxvar in fx_vars + ] + + fx_dict = {fx_var[1]['short_name']: fx_var[0] for fx_var in fx_vars} + settings['fx_variables'] = fx_dict + logger.info('Using fx_files: %s for variable %s during step %s', + pformat(settings['fx_variables']), + variable['short_name'], + step_name) + + def _update_fx_settings(settings, variable, config_user): - """Find and set the FX mask settings.""" - msg = f"Using fx files for %s of dataset {variable['dataset']}:\n%s" - if 'mask_landsea' in settings: - logger.debug('Getting fx mask settings now...') - fx_dict = _get_landsea_fraction_fx_dict(variable, config_user) - fx_list = [fx_file for fx_file in fx_dict.values() if fx_file] - settings['mask_landsea']['fx_files'] = fx_list - logger.info(msg, 'land/sea masking', pformat(fx_dict)) - - if 'mask_landseaice' in settings: - logger.debug('Getting fx mask settings now...') - settings['mask_landseaice']['fx_files'] = [] - fx_files_dict = { - 'sftgif': _get_correct_fx_file(variable, 'sftgif', config_user)} - if fx_files_dict['sftgif']: - settings['mask_landseaice']['fx_files'].append( - fx_files_dict['sftgif']) - logger.info(msg, 'land/sea ice masking', pformat(fx_files_dict)) - - if 'weighting_landsea_fraction' in settings: - logger.debug("Getting fx files for landsea fraction weighting now...") - fx_dict = _get_landsea_fraction_fx_dict(variable, config_user) - settings['weighting_landsea_fraction']['fx_files'] = fx_dict - logger.info(msg, 'land/sea fraction weighting', pformat(fx_dict)) - - for step in ('area_statistics', 'volume_statistics'): - if settings.get(step, {}).get('fx_files'): - var = dict(variable) - var['fx_files'] = settings.get(step, {}).get('fx_files') - fx_files_dict = { - fxvar: _get_correct_fx_file(variable, fxvar, config_user) - for fxvar in var['fx_files']} - settings[step]['fx_files'] = fx_files_dict - logger.info(msg, step, pformat(fx_files_dict)) + """Update fx settings depending on the needed method.""" + # get fx variables either from user defined attribute or fixed + def _get_fx_vars_from_attribute(step_settings, step_name): + user_fx_vars = step_settings.get('fx_variables') + if not user_fx_vars: + if step_name in ('mask_landsea', 'weighting_landsea_fraction'): + user_fx_vars = ['sftlf'] + if variable['project'] != 'obs4mips': + user_fx_vars.append('sftof') + elif step_name == 'mask_landseaice': + user_fx_vars = ['sftgif'] + elif step_name in ('area_statistics', + 'volume_statistics', 'zonal_statistics'): + user_fx_vars = [] + return user_fx_vars + + fx_steps = [ + 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', + 'area_statistics', 'volume_statistics', 'zonal_statistics' + ] + + for step_name, step_settings in settings.items(): + if step_name in fx_steps: + fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) + _update_fx_files(step_name, step_settings, + variable, config_user, fx_vars) def _read_attributes(filename): @@ -504,10 +524,10 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset (locally and via download).""" - (input_files, dirnames, filenames) = get_input_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + (input_files, dirnames, + filenames) = get_input_filelist(variable=variable, + rootpath=config_user['rootpath'], + drs=config_user['drs']) # Set up downloading using synda if requested. # Do not download if files are already available locally. @@ -521,8 +541,8 @@ def _get_input_files(variable, config_user): def _get_ancestors(variable, config_user): """Get the input files for a single dataset and setup provenance.""" - (input_files, dirnames, filenames) = _get_input_files(variable, - config_user) + (input_files, dirnames, + filenames) = _get_input_files(variable, config_user) logger.info("Using input files for variable %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], @@ -667,11 +687,18 @@ def get_matching(attributes): return grouped_products -def _get_preprocessor_products(variables, profile, order, ancestor_products, +def _get_preprocessor_products(variables, + profile, + order, + ancestor_products, config_user): - """Get preprocessor product definitions for a set of datasets.""" - products = set() + """ + Get preprocessor product definitions for a set of datasets. + It updates recipe settings as needed by various preprocessors + and sets the correct ancestry. + """ + products = set() for variable in variables: variable['filename'] = get_output_file(variable, config_user['preproc_dir']) @@ -697,9 +724,9 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ) _update_extract_shape(settings, config_user) _update_weighting_settings(settings, variable) - _update_fx_settings( - settings=settings, variable=variable, - config_user=config_user) + _update_fx_settings(settings=settings, + variable=variable, + config_user=config_user) _update_target_grid( variable=variable, variables=variables, @@ -748,8 +775,7 @@ def _get_single_preprocessor_task(variables, profile=profile, order=order, ancestor_products=ancestor_products, - config_user=config_user, - ) + config_user=config_user) if not products: raise RecipeError( @@ -823,8 +849,7 @@ def append(group_prefix, var): for variable in variables: group_prefix = variable['variable_group'] + '_derive_input_' if not variable.get('force_derivation') and _get_input_files( - variable, - config_user)[0]: + variable, config_user)[0]: # No need to derive, just process normally up to derive step var = deepcopy(variable) append(group_prefix, var) @@ -866,6 +891,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): _add_cmor_info(variable) # Create preprocessor task(s) derive_tasks = [] + # set up tasks if variable.get('derive'): # Create tasks to prepare the input data for the derive step derive_profile, profile = _split_derive_profile(profile) @@ -899,11 +925,9 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): class Recipe: """Recipe object.""" - info_keys = ( - 'project', 'activity', 'dataset', 'exp', 'ensemble', 'version' - ) + info_keys = ('project', 'activity', 'dataset', 'exp', 'ensemble', + 'version') """List of keys to be used to compose the alias, ordered by priority.""" - def __init__(self, raw_recipe, config_user, @@ -985,9 +1009,9 @@ def _initialize_datasets(raw_datasets): @staticmethod def _expand_ensemble(variables): """ - Expand ensemble members to multiple datasets + Expand ensemble members to multiple datasets. - Expansion only support ensembles defined as strings, not lists + Expansion only supports ensembles defined as strings, not lists. """ expanded = [] regex = re.compile(r'\(\d+:\d+\)') @@ -1000,7 +1024,7 @@ def _expand_ensemble(variables): if not match: expanded.append(variable) continue - start, end = match.group(0)[1: -1].split(':') + start, end = match.group(0)[1:-1].split(':') for i in range(int(start), int(end) + 1): expand = deepcopy(variable) expand['ensemble'] = regex.sub(str(i), ensemble, 1) @@ -1077,8 +1101,7 @@ def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, return preprocessor_output def _set_alias(self, preprocessor_output): - """ - Add unique alias for datasets. + """Add unique alias for datasets. Generates a unique alias for each dataset that will be shared by all variables. Tries to make it as small as possible to make it useful for @@ -1095,7 +1118,7 @@ def _set_alias(self, preprocessor_output): Function will not modify alias if it is manually added to the recipe but it will use the dataset info to compute the others - Examples: + Examples -------- - {project: CMIP5, model: EC-Earth, ensemble: r1i1p1} - {project: CMIP6, model: EC-Earth, ensemble: r1i1p1f1} @@ -1119,7 +1142,7 @@ def _set_alias(self, preprocessor_output): - {project: CMIP5, model: EC-Earth, experiment: historical} will generate alias 'EC-Earth' - Parameters: + Parameters ---------- preprocessor_output : dict preprocessor output dictionary diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 9f023dd020..2dfcd69b1a 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -196,7 +196,7 @@ def tile_grid_areas(cube, fx_files): # get the area average -def area_statistics(cube, operator, fx_files=None): +def area_statistics(cube, operator, fx_variables=None): """ Apply a statistical operator in the horizontal direction. @@ -232,8 +232,8 @@ def area_statistics(cube, operator, fx_files=None): operator: str The operation, options: mean, median, min, max, std_dev, sum, variance - fx_files: dict - dictionary of field:filename for the fx_files + fx_variables: dict + dictionary of field:filename for the fx_variables Returns ------- @@ -247,9 +247,9 @@ def area_statistics(cube, operator, fx_files=None): ValueError if input data cube has different shape than grid area weights """ - grid_areas = tile_grid_areas(cube, fx_files) + grid_areas = tile_grid_areas(cube, fx_variables) - if not fx_files and cube.coord('latitude').points.ndim == 2: + if not fx_variables and cube.coord('latitude').points.ndim == 2: logger.error( 'fx_file needed to calculate grid cell area for irregular grids.') raise iris.exceptions.CoordinateMultiDimError(cube.coord('latitude')) diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index a1dcf72555..a6173c8360 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -12,10 +12,10 @@ import cartopy.io.shapereader as shpreader import iris -from iris.analysis import Aggregator -from iris.util import rolling_window import numpy as np import shapely.vectorized as shp_vect +from iris.analysis import Aggregator +from iris.util import rolling_window logger = logging.getLogger(__name__) @@ -86,7 +86,7 @@ def _apply_fx_mask(fx_mask, var_data): return var_data -def mask_landsea(cube, fx_files, mask_out, always_use_ne_mask=False): +def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): """ Mask out either land mass or sea (oceans, seas and lakes). @@ -100,8 +100,8 @@ def mask_landsea(cube, fx_files, mask_out, always_use_ne_mask=False): cube: iris.cube.Cube data cube to be masked. - fx_files: list - list holding the full paths to fx files. + fx_variables: dict + dict: keys: fx variables, values: full paths to fx files. mask_out: str either "land" to mask out land mass or "sea" to mask out seas. @@ -131,7 +131,8 @@ def mask_landsea(cube, fx_files, mask_out, always_use_ne_mask=False): 'sea': os.path.join(cwd, 'ne_masks/ne_50m_ocean.shp') } - if fx_files and not always_use_ne_mask: + fx_files = fx_variables.values() + if any(fx_files) and not always_use_ne_mask: fx_cubes = {} for fx_file in fx_files: fxfile_members = os.path.basename(fx_file).split('_') @@ -180,7 +181,7 @@ def mask_landsea(cube, fx_files, mask_out, always_use_ne_mask=False): return cube -def mask_landseaice(cube, fx_files, mask_out): +def mask_landseaice(cube, fx_variables, mask_out): """ Mask out either landsea (combined) or ice. @@ -192,8 +193,8 @@ def mask_landseaice(cube, fx_files, mask_out): cube: iris.cube.Cube data cube to be masked. - fx_files: list - list holding the full paths to fx files. + fx_variables: dict + dict: keys: fx variables, values: full paths to fx files. mask_out: str either "landsea" to mask out landsea or "ice" to mask out ice. @@ -208,11 +209,12 @@ def mask_landseaice(cube, fx_files, mask_out): ValueError Error raised if fx mask and data have different dimensions. ValueError - Error raised if fx_files list is empty. + Error raised if fx files list is empty. """ - # sftgif is the only one so far - if fx_files: + # sftgif is the only one so far but users can set others + fx_files = fx_variables.values() + if any(fx_files): for fx_file in fx_files: fx_cube = iris.load_cube(fx_file) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index b0b0db138d..d7d5f02035 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -135,7 +135,7 @@ def calculate_volume(cube): """ Calculate volume from a cube. - This function is used when the volume netcdf fx_files can't be found. + This function is used when the volume netcdf fx_variables can't be found. Parameters ---------- @@ -170,7 +170,7 @@ def calculate_volume(cube): def volume_statistics( cube, operator, - fx_files=None): + fx_variables=None): """ Apply a statistical operation over a volume. @@ -184,8 +184,8 @@ def volume_statistics( Input cube. operator: str The operation to apply to the cube, options are: 'mean'. - fx_files: dict - dictionary of field:filename for the fx_files + fx_variables: dict + dictionary of field:filename for the fx_variables Returns ------- @@ -206,8 +206,8 @@ def volume_statistics( grid_volume_found = False grid_volume = None - if fx_files: - for key, fx_file in fx_files.items(): + if fx_variables: + for key, fx_file in fx_variables.items(): if fx_file is None: continue logger.info('Attempting to load %s from file: %s', key, fx_file) diff --git a/esmvalcore/preprocessor/_weighting.py b/esmvalcore/preprocessor/_weighting.py index b013581f2b..b786684135 100644 --- a/esmvalcore/preprocessor/_weighting.py +++ b/esmvalcore/preprocessor/_weighting.py @@ -7,14 +7,14 @@ logger = logging.getLogger(__name__) -def _get_land_fraction(cube, fx_files): +def _get_land_fraction(cube, fx_variables): """Extract land fraction as :mod:`dask.array`.""" land_fraction = None errors = [] - if not fx_files: + if not fx_variables: errors.append("No fx files given.") return (land_fraction, errors) - for (fx_var, fx_path) in fx_files.items(): + for (fx_var, fx_path) in fx_variables.items(): if not fx_path: errors.append(f"File for '{fx_var}' not found.") continue @@ -43,7 +43,7 @@ def _shape_is_broadcastable(shape_1, shape_2): for (m, n) in zip(shape_1[::-1], shape_2[::-1])) -def weighting_landsea_fraction(cube, fx_files, area_type): +def weighting_landsea_fraction(cube, fx_variables, area_type): """Weight fields using land or sea fraction. This preprocessor function weights a field with its corresponding land or @@ -58,7 +58,7 @@ def weighting_landsea_fraction(cube, fx_files, area_type): ---------- cube : iris.cube.Cube Data cube to be weighted. - fx_files : dict + fx_variables : dict Dictionary holding ``var_name`` (keys) and full paths (values) to the fx files as ``str`` or empty ``list`` (if not available). area_type : str @@ -81,7 +81,7 @@ def weighting_landsea_fraction(cube, fx_files, area_type): if area_type not in ('land', 'sea'): raise TypeError( f"Expected 'land' or 'sea' for area_type, got '{area_type}'") - (land_fraction, errors) = _get_land_fraction(cube, fx_files) + (land_fraction, errors) = _get_land_fraction(cube, fx_variables) if land_fraction is None: raise ValueError( f"Weighting of '{cube.var_name}' with '{area_type}' fraction " diff --git a/tests/integration/preprocessor/_mask/test_mask.py b/tests/integration/preprocessor/_mask/test_mask.py index c89c091551..afd96ac16f 100644 --- a/tests/integration/preprocessor/_mask/test_mask.py +++ b/tests/integration/preprocessor/_mask/test_mask.py @@ -63,8 +63,10 @@ def test_mask_landsea(self): dim_coords_and_dims=self.coords_spec) # mask with fx files - result_land = mask_landsea(new_cube_land, ['sftlf_test.nc'], 'land') - result_sea = mask_landsea(new_cube_sea, ['sftlf_test.nc'], 'sea') + result_land = mask_landsea(new_cube_land, + {'sftlf': 'sftlf_test.nc'}, 'land') + result_sea = mask_landsea(new_cube_sea, + {'sftlf': 'sftlf_test.nc'}, 'sea') expected = np.ma.empty((3, 3)) expected.data[:] = 200. expected.mask = np.ones((3, 3), bool) @@ -83,10 +85,10 @@ def test_mask_landsea(self): dim_coords_and_dims=self.coords_spec) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_land = mask_landsea(new_cube_land, ['sftlf_test.nc'], + result_land = mask_landsea(new_cube_land, {'sftlf': 'sftlf_test.nc'}, 'land', always_use_ne_mask=True) - result_sea = mask_landsea(new_cube_sea, ['sftlf_test.nc'], + result_sea = mask_landsea(new_cube_sea, {'sftlf': 'sftlf_test.nc'}, 'sea', always_use_ne_mask=True) @@ -106,8 +108,8 @@ def test_mask_landsea(self): dim_coords_and_dims=self.coords_spec) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_land = mask_landsea(new_cube_land, None, 'land') - result_sea = mask_landsea(new_cube_sea, None, 'sea') + result_land = mask_landsea(new_cube_land, {}, 'land') + result_sea = mask_landsea(new_cube_sea, {}, 'sea') # bear in mind all points are in the ocean np.ma.set_fill_value(result_land.data, 1e+20) @@ -122,7 +124,8 @@ def test_mask_landseaice(self): iris.save(self.fx_mask, 'sftgif_test.nc') new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_ice = mask_landseaice(new_cube_ice, ['sftgif_test.nc'], 'ice') + result_ice = mask_landseaice(new_cube_ice, + {'sftgif': 'sftgif_test.nc'}, 'ice') expected = np.ma.empty((3, 3)) expected.data[:] = 200. expected.mask = np.ones((3, 3), bool) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index e6f64b8ad0..7904c2bfe7 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1539,15 +1539,15 @@ def test_weighting_landsea_fraction(tmp_path, patched_datafinder, config_user): settings = product.settings['weighting_landsea_fraction'] assert len(settings) == 2 assert settings['area_type'] == 'land' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) if product.attributes['project'] == 'obs4mips': - assert len(fx_files) == 1 - assert fx_files.get('sftlf') + assert len(fx_variables) == 1 + assert fx_variables.get('sftlf') else: - assert len(fx_files) == 2 - assert fx_files.get('sftlf') - assert fx_files.get('sftof') + assert len(fx_variables) == 2 + assert fx_variables.get('sftlf') + assert fx_variables.get('sftof') def test_weighting_landsea_fraction_no_fx(tmp_path, patched_failing_datafinder, @@ -1591,15 +1591,15 @@ def test_weighting_landsea_fraction_no_fx(tmp_path, patched_failing_datafinder, assert len(settings) == 2 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) if product.attributes['project'] == 'obs4mips': - assert len(fx_files) == 1 - assert fx_files['sftlf'] == [] + assert len(fx_variables) == 1 + assert fx_variables['sftlf'] == [] else: - assert len(fx_files) == 2 - assert fx_files['sftlf'] == [] - assert fx_files['sftof'] == [] + assert len(fx_variables) == 2 + assert fx_variables['sftlf'] == [] + assert fx_variables['sftof'] == [] def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, @@ -1648,10 +1648,10 @@ def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, assert len(settings) == 2 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) - assert len(fx_files) == 1 - assert fx_files.get('sftlf') + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert fx_variables.get('sftlf') def test_weighting_landsea_fraction_exclude_fail(tmp_path, patched_datafinder, @@ -1726,12 +1726,88 @@ def test_landmask(tmp_path, patched_datafinder, config_user): settings = product.settings['mask_landsea'] assert len(settings) == 2 assert settings['mask_out'] == 'sea' - fx_files = settings['fx_files'] - assert isinstance(fx_files, list) + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + fx_variables = fx_variables.values() if product.attributes['project'] == 'obs4mips': - assert len(fx_files) == 1 + assert len(fx_variables) == 1 else: - assert len(fx_files) == 2 + assert len(fx_variables) == 2 + + +def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): + content = dedent(""" + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}] + mask_landseaice: + mask_out: sea + fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + volume_statistics: + operator: mean + area_statistics: + operator: mean + fx_variables: [{'short_name': 'areacello', 'mip': 'fx', + 'exp': 'piControl'}] + + diagnostics: + diagnostic_name: + variables: + gpp: + preprocessor: landmask + project: CMIP5 + mip: Lmon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1 + additional_datasets: + - {dataset: CanESM2} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check custom fx variables + task = recipe.tasks.pop() + product = task.products.pop() + + # landsea + settings = product.settings['mask_landsea'] + assert len(settings) == 2 + assert settings['mask_out'] == 'sea' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['sftlf'] + assert '_piControl_' in fx_variables['sftlf'] + + # landseaice + settings = product.settings['mask_landseaice'] + assert len(settings) == 2 + assert settings['mask_out'] == 'sea' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['sftgif'] + assert '_piControl_' in fx_variables['sftgif'] + + # volume statistics + settings = product.settings['volume_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + assert 'fx_variables' not in settings + + # area statistics + settings = product.settings['area_statistics'] + assert len(settings) == 2 + assert settings['operator'] == 'mean' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['areacello'] + assert '_piControl_' in fx_variables['areacello'] def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): @@ -1776,9 +1852,10 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): assert len(settings) == 3 assert settings['mask_out'] == 'sea' assert settings['always_use_ne_mask'] is False - fx_files = settings['fx_files'] - assert isinstance(fx_files, list) - assert fx_files == [] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + fx_variables = fx_variables.values() + assert not any(fx_variables) def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): @@ -1787,7 +1864,7 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): preproc: area_statistics: operator: mean - fx_files: [ + fx_variables: [ 'areacella', 'areacello', 'clayfrac', @@ -1828,25 +1905,26 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): settings = product.settings['area_statistics'] assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) - assert len(fx_files) == 6 - assert '_fx_' in fx_files['areacella'] - assert '_Ofx_' in fx_files['areacello'] - assert '_Efx_' in fx_files['clayfrac'] - assert '_fx_' in fx_files['sftlf'] - assert '_fx_' in fx_files['sftgif'] - assert '_Ofx_' in fx_files['sftof'] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 6 + assert '_fx_' in fx_variables['areacella'] + assert '_Ofx_' in fx_variables['areacello'] + assert '_Efx_' in fx_variables['clayfrac'] + assert '_fx_' in fx_variables['sftlf'] + assert '_fx_' in fx_variables['sftgif'] + assert '_Ofx_' in fx_variables['sftof'] # Check mask_landsea assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] assert len(settings) == 2 assert settings['mask_out'] == 'sea' - fx_files = settings['fx_files'] - assert isinstance(fx_files, list) - assert len(fx_files) == 2 - for fx_file in fx_files: + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + fx_variables = fx_variables.values() + assert len(fx_variables) == 2 + for fx_file in fx_variables: if 'sftlf' in fx_file: assert '_fx_' in fx_file elif 'sftof' in fx_file: @@ -1862,7 +1940,54 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_files: ['volcello'] + fx_variables: ['volcello'] + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + project: CMIP6 + mip: Omon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert len(task.products) == 1 + product = task.products.pop() + + # Check volume_statistics + assert 'volume_statistics' in product.settings + settings = product.settings['volume_statistics'] + assert len(settings) == 2 + assert settings['operator'] == 'mean' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_Omon_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello'] + + +def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + volume_statistics: + operator: mean + fx_variables: [{'short_name': 'volcello', 'mip': 'Oyr', + 'exp': 'piControl'}] diagnostics: diagnostic_name: @@ -1894,11 +2019,64 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, settings = product.settings['volume_statistics'] assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) - assert len(fx_files) == 1 - assert '_Ofx_' in fx_files['volcello'] - assert '_Omon_' not in fx_files['volcello'] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_Oyr_' in fx_variables['volcello'] + assert '_piControl_' in fx_variables['volcello'] + assert '_Omon_' not in fx_variables['volcello'] + + +def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + regrid: + target_grid: 1x1 + scheme: linear + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + convert_units: + units: K + area_statistics: + operator: mean + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + project: CMIP6 + mip: Omon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert len(task.ancestors) == 0 + assert len(task.products) == 1 + product = task.products.pop() + assert product.attributes['short_name'] == 'tos' + assert product.files + assert 'area_statistics' in product.settings + settings = product.settings['area_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + assert 'fx_variables' not in settings def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, @@ -1908,7 +2086,7 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_files: ['volcello'] + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -1940,11 +2118,11 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, settings = product.settings['volume_statistics'] assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) - assert len(fx_files) == 1 - assert '_Ofx_' not in fx_files['volcello'] - assert '_Omon_' in fx_files['volcello'] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_Ofx_' not in fx_variables['volcello'] + assert '_Omon_' in fx_variables['volcello'] def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, @@ -1954,7 +2132,7 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_files: ['volcello'] + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -1986,11 +2164,11 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, settings = product.settings['volume_statistics'] assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) - assert len(fx_files) == 1 - assert '_Ofx_' not in fx_files['volcello'] - assert '_Oyr_' in fx_files['volcello'] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_Ofx_' not in fx_variables['volcello'] + assert '_Oyr_' in fx_variables['volcello'] def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, @@ -2000,7 +2178,7 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_files: ['volcello'] + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -2031,11 +2209,11 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, settings = product.settings['volume_statistics'] assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_files = settings['fx_files'] - assert isinstance(fx_files, dict) - assert len(fx_files) == 1 - assert '_fx_' in fx_files['volcello'] - assert '_Omon_' not in fx_files['volcello'] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['volcello'] + assert '_Omon_' not in fx_variables['volcello'] def test_wrong_project(tmp_path, patched_datafinder, config_user): @@ -2044,7 +2222,7 @@ def test_wrong_project(tmp_path, patched_datafinder, config_user): preproc: volume_statistics: operator: mean - fx_files: ['volcello'] + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -2075,7 +2253,7 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): preproc: area_statistics: operator: mean - fx_files: [ + fx_variables: [ 'areacella', 'wrong_fx_variable', ] @@ -2101,35 +2279,3 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert msg in str(rec_err_exp.value) - - -def test_fx_var_invalid_project(tmp_path, patched_datafinder, config_user): - content = dedent(""" - preprocessors: - preproc: - area_statistics: - operator: mean - fx_files: ['areacella'] - - diagnostics: - diagnostic_name: - variables: - tas: - preprocessor: preproc - project: EMAC - mip: Amon - exp: historical - start_year: 2000 - end_year: 2005 - ensemble: r1i1p1f1 - grid: gn - additional_datasets: - - {dataset: CanESM5} - scripts: null - """) - msg = ( - "Unable to load CMOR table (project) 'EMAC' for variable 'areacella' " - "with mip 'Amon'") - with pytest.raises(RecipeError) as rec_err_exp: - get_recipe(tmp_path, content, config_user) - assert str(rec_err_exp.value) == msg