diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 20a126de5c..bf1cc71d78 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -9,6 +9,7 @@ roughly following the default order in which preprocessor functions are applied: * :ref:`Variable derivation` * :ref:`CMOR check and dataset-specific fixes` +* :ref:`Fx variables as cell measures or ancillary variables` * :ref:`Vertical interpolation` * :ref:`Weighting` * :ref:`Land/Sea/Ice masking` @@ -175,6 +176,34 @@ steps: To get an overview on data fixes and how to implement new ones, please go to :ref:`fixing_data`. +.. _Fx variables as cell measures or ancillary variables: + +Fx variables as cell measures or ancillary variables +==================================================== +Preprocessor steps related to spatial statistics or masking may require +the use of ``fx_variables`` to be able to perform the computations. +The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, +checks them against CMOR standards and adds them either as ``cell_measure`` +or ``ancillary_variable`` inside the cube data. This ensures that the +defined preprocessor chain is applied to both ``variables`` and ``fx_variables``. + +Note that when calling steps that require ``fx_variables`` inside diagnostic +scripts, the variables are expected to contain the required ``cell_measures`` or +``ancillary_variables``. If missing, they can be added using the following functions: + +.. code-block:: + + from esmvalcore.preprocessor import (add_cell_measure, add_ancillary_variable) + + cube_with_area_measure = add_cell_measure(cube, area_cube, 'area') + + cube_with_volume_measure = add_cell_measure(cube, volume_cube, 'volume) + + cube_with_ancillary_sftlf = add_ancillary_variable(cube, sftlf_cube) + + cube_with_ancillary_sftgif = add_ancillary_variable(cube, sftgif_cube) + + Details on the arguments needed for each step can be found in the following sections. .. _Vertical interpolation: @@ -325,8 +354,25 @@ experiment is preferred for fx data retrieval: weighting_landsea_fraction: area_type: land exclude: ['CanESM2', 'reference_dataset'] - fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] + fx_variables: + sftlf: + exp: piControl + sftof: + exp: piControl +or alternatively: + +.. code-block:: yaml + + preprocessors: + preproc_weighting: + weighting_landsea_fraction: + area_type: land + exclude: ['CanESM2', 'reference_dataset'] + fx_variables: [ + {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftof', 'exp': 'piControl'} + ] See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -379,8 +425,24 @@ missing. Conversely, it retrieves the ``fx: sftlf`` mask when land needs to be masked out, respectively. Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain -experiment is preferred for fx data retrieval: +experiment is preferred for fx data retrieval. Note that it is possible to specify as many tags +for the fx variable as required: + + +.. code-block:: yaml + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: + sftlf: + exp: piControl + sftof: + exp: piControl + ensemble: r2i1p1f1 + +or alternatively: .. code-block:: yaml @@ -388,7 +450,10 @@ experiment is preferred for fx data retrieval: landmask: mask_landsea: mask_out: sea - fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] + fx_variables: [ + {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftof', 'exp': 'piControl', 'ensemble': 'r2i1p1f1'} + ] If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the @@ -428,8 +493,19 @@ experiment is preferred for fx data retrieval: landseaicemask: mask_landseaice: mask_out: sea - fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + fx_variables: + sftgif: + exp: piControl +or alternatively: + +.. code-block:: yaml + + preprocessors: + landseaicemask: + mask_landseaice: + mask_out: sea + fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] See also :func:`esmvalcore.preprocessor.mask_landseaice`. @@ -1332,18 +1408,36 @@ region, depth layer or time period is required, then those regions need to be removed using other preprocessor operations in advance. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it as a list of variables e.g. +the user may specify it calling the variables e.g. .. code-block:: yaml - fx_variables: ['areacello', 'volcello'] + fx_variables: + areacello: + volcello: -or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (the key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + fx_variables: + areacello: + mip: Omon + volcello: + mip: fx + +Alternatively, the ``fx_variables`` argument can also be specified as a list: + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as a list of dictionaries: + +.. code-block:: yaml + + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. @@ -1392,18 +1486,36 @@ No depth coordinate is required as this is determined by Iris. This function works best when the ``fx_variables`` provide the cell volume. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it as a list of variables e.g. +the user may specify it calling the variables e.g. .. code-block:: yaml - fx_variables: ['areacello', 'volcello'] + fx_variables: + areacello: + volcello: -or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (the key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + fx_variables: + areacello: + mip: Omon + volcello: + mip: fx + +Alternatively, the ``fx_variables`` argument can also be specified as a list: + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as a list of dictionaries: + +.. code-block:: yaml + + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 262964aea1..6db4df455c 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -315,16 +315,21 @@ def _get_default_settings(variable, config_user, derive=False): if variable['short_name'] != variable['original_short_name']: settings['save']['alias'] = variable['short_name'] + # Configure fx settings + settings['add_fx_variables'] = { + 'fx_variables': {}, + 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) + } + settings['remove_fx_variables'] = {} + return settings -def _add_fxvar_keys(fx_var_dict, variable): +def _add_fxvar_keys(fx_info, variable): """Add keys specific to fx variable to use get_input_filelist.""" - fx_variable = dict(variable) - fx_variable.update(fx_var_dict) - - # set variable names - fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable = deepcopy(variable) + fx_variable.update(fx_info) + fx_variable['variable_group'] = fx_info['short_name'] # add special ensemble for CMIP5 only if fx_variable['project'] == 'CMIP5': @@ -336,77 +341,69 @@ def _add_fxvar_keys(fx_var_dict, variable): return fx_variable -def _get_fx_file(variable, fx_variable, config_user): +def _search_fx_mip(tables, found_mip, variable, fx_info, config_user): + fx_files = None + for mip in tables: + fx_cmor = tables[mip].get(fx_info['short_name']) + if fx_cmor: + found_mip = True + fx_info['mip'] = mip + fx_info = _add_fxvar_keys(fx_info, variable) + logger.debug( + "For fx variable '%s', found table '%s'", + fx_info['short_name'], mip) + fx_files = _get_input_files(fx_info, config_user)[0] + if fx_files: + logger.debug( + "Found fx variables '%s':\n%s", + fx_info['short_name'], pformat(fx_files)) + return found_mip, fx_info, fx_files + + +def _get_fx_files(variable, fx_info, config_user): """Get fx files (searching all possible mips).""" - # make it a dict - if isinstance(fx_variable, str): - fx_varname = fx_variable - fx_variable = {'short_name': fx_varname} - else: - fx_varname = fx_variable['short_name'] # assemble info from master variable - var = dict(variable) var_project = variable['project'] # check if project in config-developer try: get_project_config(var_project) except ValueError: raise RecipeError( - f"Requested fx variable '{fx_varname}' with parent variable" - f"'{variable}' does not have a '{var_project}' project" - f"in config-developer.") - cmor_table = CMOR_TABLES[var_project] - valid_fx_vars = [] + f"Requested fx variable '{fx_info['short_name']}' " + f"with parent variable '{variable}' does not have " + f"a '{var_project}' project in config-developer.") + project_tables = CMOR_TABLES[var_project].tables # force only the mip declared by user - if 'mip' in fx_variable: - fx_mips = [fx_variable['mip']] + found_mip = False + if not fx_info['mip']: + found_mip, fx_info, fx_files = _search_fx_mip( + project_tables, found_mip, variable, fx_info, config_user) else: - # Get all fx-related mips (original var mip, - # 'fx' and extend from cmor tables) - fx_mips = [variable['mip']] - fx_mips.extend(mip for mip in cmor_table.tables if 'fx' in mip) - - # Search all mips for available variables - # priority goes to user specified mip if available - searched_mips = [] - fx_files = [] - for fx_mip in fx_mips: - fx_cmor_variable = cmor_table.get_variable(fx_mip, fx_varname) - if fx_cmor_variable is not None: - fx_var_dict = dict(fx_variable) - searched_mips.append(fx_mip) - fx_var_dict['mip'] = fx_mip - fx_var_dict = _add_fxvar_keys(fx_var_dict, var) - valid_fx_vars.append(fx_var_dict) - logger.debug("For fx variable '%s', found table '%s'", fx_varname, - fx_mip) - fx_files = _get_input_files(fx_var_dict, config_user)[0] - - # If files found, return them - if fx_files: - logger.debug("Found fx variables '%s':\n%s", fx_varname, - pformat(fx_files)) - break + fx_cmor = project_tables[fx_info['mip']].get(fx_info['short_name']) + if fx_cmor: + found_mip = True + fx_info = _add_fxvar_keys(fx_info, variable) + fx_files = _get_input_files(fx_info, config_user)[0] # If fx variable was not found in any table, raise exception - if not searched_mips: + if not found_mip: raise RecipeError( - f"Requested fx variable '{fx_varname}' not available in " - f"any 'fx'-related CMOR table ({fx_mips}) for '{var_project}'") + f"Requested fx variable '{fx_info['short_name']}' " + f"not available in any CMOR table for '{var_project}'") # flag a warning if not fx_files: - logger.warning("Missing data for fx variable '%s'", fx_varname) + logger.warning( + "Missing data for fx variable '%s'", fx_info['short_name']) # allow for empty lists corrected for by NE masks if fx_files: - fx_files = fx_files[0] - if valid_fx_vars: - valid_fx_vars = valid_fx_vars[0] + if fx_info['frequency'] == 'fx': + fx_files = fx_files[0] - return fx_files, valid_fx_vars + return fx_files, fx_info def _exclude_dataset(settings, variable, step): @@ -432,44 +429,69 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): """Update settings with mask fx file list or dict.""" if not fx_vars: return + for fx_var, fx_info in fx_vars.items(): + if not fx_info: + fx_info = {} + if 'mip' not in fx_info: + fx_info.update({'mip': None}) + if 'short_name' not in fx_info: + fx_info.update({'short_name': fx_var}) + fx_files, fx_info = _get_fx_files(variable, fx_info, config_user) + if fx_files: + fx_info['filename'] = fx_files + settings['add_fx_variables']['fx_variables'].update({ + fx_var: fx_info + }) - fx_vars = [_get_fx_file(variable, fxvar, config_user) for fxvar in fx_vars] - - fx_dict = {fx_var[1]['short_name']: fx_var[0] for fx_var in fx_vars} - settings['fx_variables'] = fx_dict logger.info('Using fx_files: %s for variable %s during step %s', - pformat(settings['fx_variables']), variable['short_name'], - step_name) + pformat(settings['add_fx_variables']['fx_variables']), + variable['short_name'], step_name) + + +def _fx_list_to_dict(fx_vars): + """Convert fx list to dictionary. To be deprecated at some point.""" + user_fx_vars = {} + for fx_var in fx_vars: + if isinstance(fx_var, dict): + short_name = fx_var['short_name'] + user_fx_vars.update({short_name: fx_var}) + continue + user_fx_vars.update({fx_var: None}) + return user_fx_vars def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" - # get fx variables either from user defined attribute or fixed def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') + if isinstance(user_fx_vars, list): + user_fx_vars = _fx_list_to_dict(user_fx_vars) + step_settings['fx_variables'] = user_fx_vars if not user_fx_vars: if step_name in ('mask_landsea', 'weighting_landsea_fraction'): - user_fx_vars = ['sftlf'] + user_fx_vars = {'sftlf': None} if variable['project'] != 'obs4mips': - user_fx_vars.append('sftof') + user_fx_vars.update({'sftof': None}) elif step_name == 'mask_landseaice': - user_fx_vars = ['sftgif'] - elif step_name in ('area_statistics', 'volume_statistics', - 'zonal_statistics'): - user_fx_vars = [] - return user_fx_vars + user_fx_vars = {'sftgif': None} + elif step_name in ('area_statistics', 'volume_statistics'): + user_fx_vars = {} + step_settings['fx_variables'] = user_fx_vars fx_steps = [ 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', - 'area_statistics', 'volume_statistics', 'zonal_statistics' + 'area_statistics', 'volume_statistics' ] - - for step_name, step_settings in settings.items(): + for step_name in settings: if step_name in fx_steps: - fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) - _update_fx_files(step_name, step_settings, variable, config_user, - fx_vars) + _get_fx_vars_from_attribute(settings[step_name], step_name) + _update_fx_files(step_name, settings, variable, config_user, + settings[step_name]['fx_variables']) + # Remove unused attribute in 'fx_steps' preprocessors. + # The fx_variables information is saved in + # the 'add_fx_variables' step. + settings[step_name].pop('fx_variables', None) def _read_attributes(filename): diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index e9747b2fc1..9fe86d140e 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -10,6 +10,7 @@ from .._task import BaseTask from ..cmor.check import cmor_check_data, cmor_check_metadata from ..cmor.fix import fix_data, fix_file, fix_metadata +from ._ancillary_vars import add_fx_variables, remove_fx_variables from ._area import ( area_statistics, extract_named_regions, @@ -93,6 +94,8 @@ # Data reformatting/CMORization 'fix_data', 'cmor_check_data', + # Load fx_variables in cube + 'add_fx_variables', # Time extraction (as defined in the preprocessor section) 'extract_time', 'extract_season', @@ -156,6 +159,8 @@ 'linear_trend', 'linear_trend_stderr', 'convert_units', + # Remove fx_variables from cube + 'remove_fx_variables', # Save to file 'save', 'cleanup', @@ -179,8 +184,8 @@ DEFAULT_ORDER = tuple(__all__) # The order of initial and final steps cannot be configured -INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('cmor_check_data') + 1] -FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('save'):] +INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('add_fx_variables') + 1] +FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('remove_fx_variables'):] MULTI_MODEL_FUNCTIONS = { 'multi_model_statistics', diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py new file mode 100644 index 0000000000..5d6dbe32d1 --- /dev/null +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -0,0 +1,203 @@ +"""Preprocessor functions for ancillary variables and cell measures.""" + +import logging +import iris + +import dask.array as da + +from esmvalcore.preprocessor._io import load, concatenate_callback, concatenate +from esmvalcore.cmor.fix import fix_metadata, fix_data +from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data + +logger = logging.getLogger(__name__) + + +def _load_fx(fx_info, check_level): + """Load and CMOR-check fx variables.""" + fx_cubes = iris.cube.CubeList() + + for fx_file in fx_info['filename']: + loaded_cube = load(fx_file, callback=concatenate_callback) + short_name = fx_info['short_name'] + project = fx_info['project'] + dataset = fx_info['dataset'] + mip = fx_info['mip'] + freq = fx_info['frequency'] + loaded_cube = fix_metadata(loaded_cube, short_name=short_name, + project=project, dataset=dataset, + mip=mip, frequency=freq, + check_level=check_level) + fx_cubes.append(loaded_cube[0]) + + fx_cube = concatenate(fx_cubes) + + fx_cube = cmor_check_metadata(fx_cube, cmor_table=project, mip=mip, + short_name=short_name, frequency=freq, + check_level=check_level) + + fx_cube = fix_data(fx_cube, short_name=short_name, project=project, + dataset=dataset, mip=mip, frequency=freq, + check_level=check_level) + + fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, + short_name=fx_cube.var_name, frequency=freq, + check_level=check_level) + + return fx_cube + + +def add_cell_measure(cube, fx_cube, measure): + """ + Broadcast fx_cube and add it as a cell_measure in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_cube: iris.cube.Cube + Iris cube with fx data. + measure: str + Name of the measure, can be 'area' or 'volume'. + + Returns + ------- + iris.cube.Cube + Cube with added ancillary variables + + Raises + ------ + ValueError + If measure name is not 'area' or 'volume'. + ValueError + If fx_cube cannot be broadcast to cube. + """ + if measure not in ['area', 'volume']: + raise ValueError(f"measure name must be 'area' or 'volume', " + f"got {measure} instead") + try: + fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) + except ValueError as exc: + raise ValueError(f"Dimensions of {cube.var_name} and " + f"{fx_cube.var_name} cubes do not match. " + "Cannot broadcast cubes.") from exc + measure = iris.coords.CellMeasure( + fx_data, + standard_name=fx_cube.standard_name, + units=fx_cube.units, + measure=measure, + var_name=fx_cube.var_name, + attributes=fx_cube.attributes) + cube.add_cell_measure(measure, range(0, measure.ndim)) + logger.debug('Added %s as cell measure in cube of %s.', + fx_cube.var_name, cube.var_name) + + +def add_ancillary_variable(cube, fx_cube): + """ + Broadcast fx_cube and add it as an ancillary_variable in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_cube: iris.cube.Cube + Iris cube with fx data. + + Returns + ------- + iris.cube.Cube + Cube with added ancillary variables + + Raises + ------ + ValueError + If fx_cube cannot be broadcast to cube. + """ + try: + fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) + except ValueError as exc: + raise ValueError(f"Dimensions of {cube.var_name} and " + f"{fx_cube.var_name} cubes do not match. " + "Cannot broadcast cubes.") from exc + ancillary_var = iris.coords.AncillaryVariable( + fx_data, + standard_name=fx_cube.standard_name, + units=fx_cube.units, + var_name=fx_cube.var_name, + attributes=fx_cube.attributes) + cube.add_ancillary_variable(ancillary_var, range(0, ancillary_var.ndim)) + logger.debug('Added %s as ancillary variable in cube of %s.', + fx_cube.var_name, cube.var_name) + + +def add_fx_variables(cube, fx_variables, check_level): + """ + Load requested fx files, check with CMOR standards and add the + fx variables as cell measures or ancillary variables in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_variables: dict + Dictionary with fx_variable information. + check_level: CheckLevels + Level of strictness of the checks. + + + Returns + ------- + iris.cube.Cube + Cube with added cell measures or ancillary variables. + """ + + if not fx_variables: + return cube + + for fx_info in fx_variables.values(): + if not fx_info: + continue + if isinstance(fx_info['filename'], str): + fx_info['filename'] = [fx_info['filename']] + fx_cube = _load_fx(fx_info, check_level) + + measure_name = { + 'areacella': 'area', + 'areacello': 'area', + 'volcello': 'volume' + } + + if fx_cube.var_name in measure_name: + add_cell_measure(cube, fx_cube, measure_name[fx_cube.var_name]) + else: + add_ancillary_variable(cube, fx_cube) + return cube + + +def remove_fx_variables(cube): + """ + Remove fx variables present as cell measures or ancillary variables in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with data and cell measures or ancillary variables. + + + Returns + ------- + iris.cube.Cube + Cube without cell measures or ancillary variables. + """ + + if cube.cell_measures(): + for measure in cube.cell_measures(): + cube.remove_cell_measure(measure.standard_name) + if cube.ancillary_variables(): + for variable in cube.ancillary_variables(): + cube.remove_ancillary_variable(variable.standard_name) + return cube diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index b1ea609dcb..f3a6a69c79 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -155,46 +155,7 @@ def meridional_statistics(cube, operator): raise ValueError(msg) -def tile_grid_areas(cube, fx_files): - """Tile the grid area data to match the dataset cube. - - Parameters - ---------- - cube: iris.cube.Cube - input cube. - fx_files: dict - dictionary of field:filename for the fx_files - - Returns - ------- - iris.cube.Cube - Freshly tiled grid areas cube. - """ - grid_areas = None - if fx_files: - for key, fx_file in fx_files.items(): - if not fx_file: - continue - logger.info('Attempting to load %s from file: %s', key, fx_file) - fx_cube = iris.load_cube(fx_file) - - grid_areas = fx_cube.core_data() - if cube.ndim == 4 and grid_areas.ndim == 2: - grid_areas = da.tile(grid_areas, - [cube.shape[0], cube.shape[1], 1, 1]) - elif cube.ndim == 4 and grid_areas.ndim == 3: - grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1, 1]) - elif cube.ndim == 3 and grid_areas.ndim == 2: - grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1]) - else: - raise ValueError('Grid and dataset number of dimensions not ' - 'recognised: {} and {}.' - ''.format(cube.ndim, grid_areas.ndim)) - return grid_areas - - -# get the area average -def area_statistics(cube, operator, fx_variables=None): +def area_statistics(cube, operator): """Apply a statistical operator in the horizontal direction. The average in the horizontal direction. We assume that the @@ -231,8 +192,6 @@ def area_statistics(cube, operator, fx_variables=None): operator: str The operation, options: mean, median, min, max, std_dev, sum, variance, rms. - fx_variables: dict - dictionary of field:filename for the fx_variables Returns ------- @@ -246,9 +205,17 @@ def area_statistics(cube, operator, fx_variables=None): ValueError if input data cube has different shape than grid area weights """ - grid_areas = tile_grid_areas(cube, fx_variables) + grid_areas = None + try: + grid_areas = cube.cell_measure('cell_area').core_data() + except iris.exceptions.CellMeasureNotFoundError: + logger.info( + 'Cell measure "cell_area" not found in cube %s. ' + 'Check fx_file availability.', cube + ) + logger.info('Attempting to calculate grid cell area...') - if not fx_variables and cube.coord('latitude').points.ndim == 2: + if grid_areas is None and cube.coord('latitude').points.ndim == 2: coord_names = [coord.standard_name for coord in cube.coords()] if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) @@ -267,7 +234,7 @@ def area_statistics(cube, operator, fx_variables=None): cube.coord('latitude')) coord_names = ['longitude', 'latitude'] - if grid_areas is None or not grid_areas.any(): + if grid_areas is None: cube = guess_bounds(cube, coord_names) grid_areas = iris.analysis.cartography.area_weights(cube) logger.info('Calculated grid area shape: %s', grid_areas.shape) diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index 08ba463275..db328f97e3 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -2,7 +2,7 @@ Mask module. Module that performs a number of masking -operations that include: masking with fx files, masking with +operations that include: masking with ancillary variables, masking with Natural Earth shapefiles (land or ocean), masking on thresholds, missing values masking. """ @@ -21,28 +21,6 @@ logger = logging.getLogger(__name__) -def _check_dims(cube, mask_cube): - """Check for same ndim and x-y dimensions for data and mask cubes.""" - x_dim = cube.coord('longitude').points.ndim - y_dim = cube.coord('latitude').points.ndim - mx_dim = mask_cube.coord('longitude').points.ndim - my_dim = mask_cube.coord('latitude').points.ndim - len_x = len(cube.coord('longitude').points) - len_y = len(cube.coord('latitude').points) - len_mx = len(mask_cube.coord('longitude').points) - len_my = len(mask_cube.coord('latitude').points) - if (x_dim == mx_dim and y_dim == my_dim and len_x == len_mx - and len_y == len_my): - logger.debug('Data cube and fx mask have same dims') - return True - - logger.debug( - 'Data cube and fx mask differ in dims: ' - 'cube: ((%i, %i), grid=(%i, %i)), mask: ((%i, %i), grid=(%i, %i))', - x_dim, y_dim, len_x, len_y, mx_dim, my_dim, len_mx, len_my) - return False - - def _get_fx_mask(fx_data, fx_option, mask_type): """Build a percentage-thresholded mask from an fx file.""" inmask = np.zeros_like(fx_data, bool) @@ -73,37 +51,32 @@ def _get_fx_mask(fx_data, fx_option, mask_type): def _apply_fx_mask(fx_mask, var_data): """Apply the fx data extracted mask on the actual processed data.""" - # Broadcast mask - var_mask = np.zeros_like(var_data, bool) - var_mask = np.broadcast_to(fx_mask, var_mask.shape).copy() - # Apply mask across if np.ma.is_masked(var_data): - var_mask |= var_data.mask + fx_mask |= var_data.mask # Build the new masked data - var_data = np.ma.array(var_data, mask=var_mask, fill_value=1e+20) + var_data = np.ma.array(var_data, mask=fx_mask, fill_value=1e+20) return var_data -def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): +def mask_landsea(cube, mask_out, always_use_ne_mask=False): """ Mask out either land mass or sea (oceans, seas and lakes). - It uses dedicated fx files (sftlf or sftof) or, in their absence, it - applies a Natural Earth mask (land or ocean contours). Note that the - Natural Earth masks have different resolutions: 10m for land, and 50m - for seas; these are more than enough for ESMValTool puprpose. + It uses dedicated ancillary variables (sftlf or sftof) or, + in their absence, it applies a + Natural Earth mask (land or ocean contours). + Note that the Natural Earth masks have different resolutions: + 10m for land, and 50m for seas. + These are more than enough for ESMValTool purposes. Parameters ---------- cube: iris.cube.Cube data cube to be masked. - fx_variables: dict - dict: keys: fx variables, values: full paths to fx files. - mask_out: str either "land" to mask out land mass or "sea" to mask out seas. @@ -132,30 +105,24 @@ def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): 'sea': os.path.join(cwd, 'ne_masks/ne_50m_ocean.shp') } - fx_files = fx_variables.values() - if any(fx_files) and not always_use_ne_mask: - fx_cubes = {} - for fx_file in fx_files: - if not fx_file: - continue - fxfile_members = os.path.basename(fx_file).split('_') - for fx_root in ['sftlf', 'sftof']: - if fx_root in fxfile_members: - fx_cubes[fx_root] = iris.load_cube(fx_file) - + if not always_use_ne_mask: # preserve importance order: try stflf first then sftof - if ('sftlf' in fx_cubes.keys() - and _check_dims(cube, fx_cubes['sftlf'])): - landsea_mask = _get_fx_mask(fx_cubes['sftlf'].data, mask_out, - 'sftlf') - cube.data = _apply_fx_mask(landsea_mask, cube.data) - logger.debug("Applying land-sea mask: sftlf") - elif ('sftof' in fx_cubes.keys() - and _check_dims(cube, fx_cubes['sftof'])): - landsea_mask = _get_fx_mask(fx_cubes['sftof'].data, mask_out, - 'sftof') + fx_cube = None + try: + fx_cube = cube.ancillary_variable('land_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + try: + fx_cube = cube.ancillary_variable('sea_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + logger.debug( + 'Ancillary variables land/sea area fraction ' + 'not found in cube. Check fx_file availability.') + + if fx_cube: + landsea_mask = _get_fx_mask( + fx_cube.data, mask_out, fx_cube.var_name) cube.data = _apply_fx_mask(landsea_mask, cube.data) - logger.debug("Applying land-sea mask: sftof") + logger.debug("Applying land-sea mask: %s", fx_cube.var_name) else: if cube.coord('longitude').points.ndim < 2: cube = _mask_with_shp(cube, shapefiles[mask_out], [ @@ -184,21 +151,20 @@ def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): return cube -def mask_landseaice(cube, fx_variables, mask_out): +def mask_landseaice(cube, mask_out): """ Mask out either landsea (combined) or ice. Function that masks out either landsea (land and seas) or ice (Antarctica - and Greenland and some wee glaciers). It uses dedicated fx files (sftgif). + and Greenland and some wee glaciers). + + It uses dedicated ancillary variables (sftgif). Parameters ---------- cube: iris.cube.Cube data cube to be masked. - fx_variables: dict - dict: keys: fx variables, values: full paths to fx files. - mask_out: str either "landsea" to mask out landsea or "ice" to mask out ice. @@ -210,26 +176,20 @@ def mask_landseaice(cube, fx_variables, mask_out): Raises ------ ValueError - Error raised if fx mask and data have different dimensions. - ValueError - Error raised if fx files list is empty. + Error raised if landsea-ice mask not found as an ancillary variable. """ # sftgif is the only one so far but users can set others - fx_files = fx_variables.values() - if any(fx_files): - for fx_file in fx_files: - if not fx_file: - continue - fx_cube = iris.load_cube(fx_file) - - if _check_dims(cube, fx_cube): - landice_mask = _get_fx_mask(fx_cube.data, mask_out, 'sftgif') - cube.data = _apply_fx_mask(landice_mask, cube.data) - logger.debug("Applying landsea-ice mask: sftgif") - else: - msg = "Landsea-ice mask and data have different dimensions." - raise ValueError(msg) + fx_cube = None + try: + fx_cube = cube.ancillary_variable('land_ice_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + logger.debug('Ancillary variable land ice area fraction ' + 'not found in cube. Check fx_file availability.') + if fx_cube: + landice_mask = _get_fx_mask(fx_cube.data, mask_out, fx_cube.var_name) + cube.data = _apply_fx_mask(landice_mask, cube.data) + logger.debug("Applying landsea-ice mask: sftgif") else: msg = "Landsea-ice mask could not be found. Stopping. " raise ValueError(msg) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 589a6c68e3..27182081be 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -174,10 +174,7 @@ def calculate_volume(cube): return grid_volume -def volume_statistics( - cube, - operator, - fx_variables=None): +def volume_statistics(cube, operator): """ Apply a statistical operation over a volume. @@ -187,12 +184,10 @@ def volume_statistics( Parameters ---------- - cube: iris.cube.Cube - Input cube. - operator: str - The operation to apply to the cube, options are: 'mean'. - fx_variables: dict - dictionary of field:filename for the fx_variables + cube: iris.cube.Cube + Input cube. + operator: str + The operation to apply to the cube, options are: 'mean'. Returns ------- @@ -211,27 +206,16 @@ def volume_statistics( # Load z coordinate field and figure out which dim is which. t_dim = cube.coord_dims('time')[0] - grid_volume_found = False - grid_volume = None - if fx_variables: - for key, fx_file in fx_variables.items(): - if fx_file is None: - continue - logger.info('Attempting to load %s from file: %s', key, fx_file) - fx_cube = iris.load_cube(fx_file) - - grid_volume = fx_cube.data - grid_volume_found = True - cube_shape = cube.data.shape - - if not grid_volume_found: + try: + grid_volume = cube.cell_measure('ocean_volume').core_data() + except iris.exceptions.CellMeasureNotFoundError: + logger.info( + 'Cell measure "ocean_volume" not found in cube. ' + 'Check fx_file availability.' + ) + logger.info('Attempting to calculate grid cell volume...') grid_volume = calculate_volume(cube) - # Check whether the dimensions are right. - if cube.data.ndim == 4 and grid_volume.ndim == 3: - grid_volume = np.tile(grid_volume, - [cube_shape[0], 1, 1, 1]) - if cube.data.shape != grid_volume.shape: raise ValueError('Cube shape ({}) doesn`t match grid volume shape ' '({})'.format(cube.data.shape, grid_volume.shape)) diff --git a/esmvalcore/preprocessor/_weighting.py b/esmvalcore/preprocessor/_weighting.py index b786684135..32e6c526a0 100644 --- a/esmvalcore/preprocessor/_weighting.py +++ b/esmvalcore/preprocessor/_weighting.py @@ -7,43 +7,31 @@ logger = logging.getLogger(__name__) -def _get_land_fraction(cube, fx_variables): +def _get_land_fraction(cube): """Extract land fraction as :mod:`dask.array`.""" + fx_cube = None land_fraction = None errors = [] - if not fx_variables: - errors.append("No fx files given.") - return (land_fraction, errors) - for (fx_var, fx_path) in fx_variables.items(): - if not fx_path: - errors.append(f"File for '{fx_var}' not found.") - continue - fx_cube = iris.load_cube(fx_path) - if not _shape_is_broadcastable(fx_cube.shape, cube.shape): + try: + fx_cube = cube.ancillary_variable('land_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + try: + fx_cube = cube.ancillary_variable('sea_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: errors.append( - f"Cube '{fx_var}' with shape {fx_cube.shape} not " - f"broadcastable to cube '{cube.var_name}' with shape " - f"{cube.shape}.") - continue - if fx_var == 'sftlf': - land_fraction = fx_cube.core_data() / 100.0 - break - if fx_var == 'sftof': - land_fraction = 1.0 - fx_cube.core_data() / 100.0 - break - errors.append( - f"Cannot calculate land fraction from '{fx_var}', expected " - f"'sftlf' or 'sftof'.") - return (land_fraction, errors) + 'Ancillary variables land/sea area fraction ' + 'not found in cube. Check fx_file availability.') + return (land_fraction, errors) + if fx_cube.var_name == 'sftlf': + land_fraction = fx_cube.core_data() / 100.0 + if fx_cube.var_name == 'sftof': + land_fraction = 1.0 - fx_cube.core_data() / 100.0 -def _shape_is_broadcastable(shape_1, shape_2): - """Check if two :mod:`numpy.array' shapes are broadcastable.""" - return all((m == n) or (m == 1) or (n == 1) - for (m, n) in zip(shape_1[::-1], shape_2[::-1])) + return (land_fraction, errors) -def weighting_landsea_fraction(cube, fx_variables, area_type): +def weighting_landsea_fraction(cube, area_type): """Weight fields using land or sea fraction. This preprocessor function weights a field with its corresponding land or @@ -58,9 +46,6 @@ def weighting_landsea_fraction(cube, fx_variables, area_type): ---------- cube : iris.cube.Cube Data cube to be weighted. - fx_variables : dict - Dictionary holding ``var_name`` (keys) and full paths (values) to the - fx files as ``str`` or empty ``list`` (if not available). area_type : str Use land (``'land'``) or sea (``'sea'``) fraction for weighting. @@ -74,14 +59,13 @@ def weighting_landsea_fraction(cube, fx_variables, area_type): TypeError ``area_type`` is not ``'land'`` or ``'sea'``. ValueError - Land/sea fraction variables ``sftlf`` or ``sftof`` not found or shape - of them is not broadcastable to ``cube``. + Land/sea fraction variables ``sftlf`` or ``sftof`` not found. """ if area_type not in ('land', 'sea'): raise TypeError( f"Expected 'land' or 'sea' for area_type, got '{area_type}'") - (land_fraction, errors) = _get_land_fraction(cube, fx_variables) + (land_fraction, errors) = _get_land_fraction(cube) if land_fraction is None: raise ValueError( f"Weighting of '{cube.var_name}' with '{area_type}' fraction " diff --git a/tests/integration/preprocessor/_ancillary_vars/__init__.py b/tests/integration/preprocessor/_ancillary_vars/__init__.py new file mode 100644 index 0000000000..88b606fc48 --- /dev/null +++ b/tests/integration/preprocessor/_ancillary_vars/__init__.py @@ -0,0 +1,5 @@ +""" +Test _ancillary_vars.py + +Integration tests for the esmvalcore.preprocessor._ancillary_vars module +""" diff --git a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py new file mode 100644 index 0000000000..7ac756d50a --- /dev/null +++ b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py @@ -0,0 +1,217 @@ +""" +Test add_fx_variables. + +Integration tests for the +:func:`esmvalcore.preprocessor._ancillary_vars` module. + +""" +import logging +import iris +import numpy as np +import pytest + +from esmvalcore.cmor.check import CheckLevels +from esmvalcore.preprocessor._ancillary_vars import (add_fx_variables, + add_ancillary_variable, + add_cell_measure, + remove_fx_variables) + +logger = logging.getLogger(__name__) + + +class Test: + """Test class.""" + @pytest.fixture(autouse=True) + def setUp(self): + """Assemble a stock cube.""" + fx_area_data = np.ones((3, 3)) + fx_volume_data = np.ones((3, 3, 3)) + self.new_cube_data = np.empty((3, 3)) + self.new_cube_data[:] = 200. + self.new_cube_3D_data = np.empty((3, 3, 3)) + self.new_cube_3D_data[:] = 200. + crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) + self.lons = iris.coords.DimCoord([0, 1.5, 3], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_east', + coord_system=crd_sys) + self.lats = iris.coords.DimCoord([0, 1.5, 3], + standard_name='latitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_north', + coord_system=crd_sys) + self.depth = iris.coords.DimCoord([0, 1.5, 3], + standard_name='depth', + bounds=[[0, 1], [1, 2], [2, 3]], + units='m', + long_name='ocean depth coordinate') + self.monthly_times = iris.coords.DimCoord( + [15.5, 45, 74.5, 105, 135.5, 166, + 196.5, 227.5, 258, 288.5, 319, 349.5], + standard_name='time', + bounds=[[0, 31], [31, 59], [59, 90], + [90, 120], [120, 151], [151, 181], + [181, 212], [212, 243], [243, 273], + [273, 304], [304, 334], [334, 365]], + units='days since 1950-01-01 00:00:00') + self.yearly_times = iris.coords.DimCoord( + [182.5, 547.5], + standard_name='time', + bounds=[[0, 365], [365, 730]], + units='days since 1950-01-01 00:00') + self.coords_spec = [(self.lats, 0), (self.lons, 1)] + self.fx_area = iris.cube.Cube(fx_area_data, + dim_coords_and_dims=self.coords_spec) + self.fx_volume = iris.cube.Cube(fx_volume_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2) + ]) + + def test_add_cell_measure_area(self, tmp_path): + """Test add area fx variables as cell measures.""" + fx_vars = { + 'areacella': { + 'short_name': 'areacella', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx'}, + 'areacello': { + 'short_name': 'areacello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx' + } + } + for fx_var in fx_vars: + self.fx_area.var_name = fx_var + self.fx_area.standard_name = 'cell_area' + self.fx_area.units = 'm2' + fx_file = str(tmp_path / f'{fx_var}.nc') + fx_vars[fx_var].update({'filename': fx_file}) + iris.save(self.fx_area, fx_file) + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube = add_fx_variables( + cube, {fx_var: fx_vars[fx_var]}, CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_area.standard_name) is not None + + def test_add_cell_measure_volume(self, tmp_path): + """Test add volume as cell measure.""" + fx_vars = { + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx'} + } + self.fx_volume.var_name = 'volcello' + self.fx_volume.standard_name = 'ocean_volume' + self.fx_volume.units = 'm3' + fx_file = str(tmp_path / 'volcello.nc') + iris.save(self.fx_volume, fx_file) + fx_vars['volcello'].update({'filename': fx_file}) + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_volume.standard_name) is not None + + def test_no_cell_measure(self): + """Test no cell measure is added.""" + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_fx_variables(cube, {'areacello': None}, CheckLevels.IGNORE) + assert cube.cell_measures() == [] + + def test_add_ancillary_vars(self, tmp_path): + """Test invalid variable is not added as cell measure.""" + self.fx_area.var_name = 'sftlf' + self.fx_area.standard_name = "land_area_fraction" + self.fx_area.units = '%' + fx_file = str(tmp_path / f'{self.fx_area.var_name}.nc') + iris.save(self.fx_area, fx_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': fx_file} + } + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) + assert cube.ancillary_variable(self.fx_area.standard_name) is not None + + def test_wrong_time_frequency(self, tmp_path): + """ + Test error is raised when cube and fx cube + frequencies do not match. + """ + volume_data = np.ones((2, 3, 3, 3)) + volume_cube = iris.cube.Cube( + volume_data, + dim_coords_and_dims=[(self.yearly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + volume_cube.standard_name = 'ocean_volume' + volume_cube.var_name = 'volcello' + volume_cube.units = 'm3' + fx_file = str(tmp_path / f'{volume_cube.var_name}.nc') + iris.save(volume_cube, fx_file) + fx_vars = { + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Oyr', + 'frequency': 'yr', + 'filename': fx_file} + } + data = np.ones((12, 3, 3, 3)) + cube = iris.cube.Cube( + data, + dim_coords_and_dims=[(self.monthly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + cube.var_name = 'thetao' + with pytest.raises(ValueError) as excinfo: + cube = add_fx_variables( + cube, fx_vars, CheckLevels.IGNORE) + msg = (f"Dimensions of {cube.var_name} and {volume_cube.var_name} " + "cubes do not match. Cannot broadcast cubes.") + assert msg in str(excinfo.value) + + def test_remove_fx_vars(self): + """Test fx_variables are removed from cube.""" + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[(self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + self.fx_area.var_name = 'areacella' + self.fx_area.standard_name = 'cell_area' + self.fx_area.units = 'm2' + add_cell_measure(cube, self.fx_area, measure='area') + assert cube.cell_measure(self.fx_area.standard_name) is not None + self.fx_area.var_name = 'sftlf' + self.fx_area.standard_name = "land_area_fraction" + self.fx_area.units = '%' + add_ancillary_variable(cube, self.fx_area) + assert cube.ancillary_variable(self.fx_area.standard_name) is not None + cube = remove_fx_variables(cube) + assert cube.cell_measures() == [] + assert cube.ancillary_variables() == [] diff --git a/tests/integration/preprocessor/_mask/test_mask.py b/tests/integration/preprocessor/_mask/test_mask.py index 5ddb30a228..4e2ef513f8 100644 --- a/tests/integration/preprocessor/_mask/test_mask.py +++ b/tests/integration/preprocessor/_mask/test_mask.py @@ -10,8 +10,10 @@ import numpy as np import pytest +from esmvalcore.cmor.check import CheckLevels from esmvalcore.preprocessor import (PreprocessorFile, mask_fillvalues, - mask_landsea, mask_landseaice) + mask_landsea, mask_landseaice, + add_fx_variables) from tests import assert_array_equal @@ -47,58 +49,90 @@ def setUp(self): units='hours') self.coords_spec = [(self.lats, 0), (self.lons, 1)] self.fx_mask = iris.cube.Cube(fx_data, - dim_coords_and_dims=self.coords_spec) + dim_coords_and_dims=self.coords_spec, + units='%') self.mock_data = np.ma.empty((4, 3, 3)) self.mock_data[:] = 10. - def test_components_fx_dict(self, tmp_path): - """Test compatibility of input fx dictionary.""" + def test_components_fx_var(self, tmp_path): + """Test compatibility of ancillary variables.""" + self.fx_mask.var_name = 'sftlf' + self.fx_mask.standard_name = 'land_area_fraction' sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, - { - 'sftlf': sftlf_file, - 'sftof': [], - }, 'land', ) assert isinstance(result_land, iris.cube.Cube) + self.fx_mask.var_name = 'sftgif' + self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) + fx_vars = { + 'sftgif': { + 'short_name': 'sftgif', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, + CheckLevels.IGNORE) result_ice = mask_landseaice( new_cube_ice, - { - 'sftgif': sftgif_file, - 'sftof': [], - }, 'ice', ) assert isinstance(result_ice, iris.cube.Cube) def test_mask_landsea(self, tmp_path): """Test mask_landsea func.""" + self.fx_mask.var_name = 'sftlf' + self.fx_mask.standard_name = 'land_area_fraction' sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, + CheckLevels.IGNORE) # mask with fx files result_land = mask_landsea( new_cube_land, - {'sftlf': sftlf_file}, 'land', ) result_sea = mask_landsea( new_cube_sea, - {'sftlf': sftlf_file}, 'sea', ) expected = np.ma.empty((3, 3)) @@ -117,17 +151,19 @@ def test_mask_landsea(self, tmp_path): # Mask with shp files although sftlf is available new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, + CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, - {'sftlf': sftlf_file}, 'land', always_use_ne_mask=True, ) result_sea = mask_landsea( new_cube_sea, - {'sftlf': sftlf_file}, 'sea', always_use_ne_mask=True, ) @@ -145,8 +181,8 @@ def test_mask_landsea(self, tmp_path): dim_coords_and_dims=self.coords_spec) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_land = mask_landsea(new_cube_land, {}, 'land') - result_sea = mask_landsea(new_cube_sea, {}, 'sea') + result_land = mask_landsea(new_cube_land, 'land') + result_sea = mask_landsea(new_cube_sea, 'sea') # bear in mind all points are in the ocean np.ma.set_fill_value(result_land.data, 1e+20) @@ -158,12 +194,24 @@ def test_mask_landsea(self, tmp_path): def test_mask_landseaice(self, tmp_path): """Test mask_landseaice func.""" + self.fx_mask.var_name = 'sftgif' + self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) + fx_vars = { + 'sftgif': { + 'short_name': 'sftgif', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftgif_file} + } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_ice = mask_landseaice(new_cube_ice, {'sftgif': sftgif_file}, - 'ice') + new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, + CheckLevels.IGNORE) + result_ice = mask_landseaice(new_cube_ice, 'ice') expected = np.ma.empty((3, 3)) expected.data[:] = 200. expected.mask = np.ones((3, 3), bool) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 459c242b7e..81e3ef7720 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -76,6 +76,7 @@ ) DEFAULT_PREPROCESSOR_STEPS = ( + 'add_fx_variables', 'cleanup', 'cmor_check_data', 'cmor_check_metadata', @@ -85,6 +86,7 @@ 'fix_file', 'fix_metadata', 'load', + 'remove_fx_variables', 'save', ) @@ -161,6 +163,11 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'short_name': 'chl', 'frequency': 'yr', }, + 'add_fx_variables': { + 'fx_variables': {}, + 'check_level': CheckLevels.DEFAULT, + }, + 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] }, @@ -582,6 +589,11 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'short_name': 'sftlf', 'frequency': 'fx', }, + 'add_fx_variables': { + 'fx_variables': {}, + 'check_level': CheckLevels.DEFAULT, + }, + 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] }, @@ -1655,9 +1667,9 @@ def test_weighting_landsea_fraction(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) if product.attributes['project'] == 'obs4mips': assert len(fx_variables) == 1 @@ -1706,18 +1718,12 @@ def test_weighting_landsea_fraction_no_fx(tmp_path, patched_failing_datafinder, for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - if product.attributes['project'] == 'obs4mips': - assert len(fx_variables) == 1 - assert fx_variables['sftlf'] == [] - else: - assert len(fx_variables) == 2 - assert fx_variables['sftlf'] == [] - assert fx_variables['sftof'] == [] + assert len(fx_variables) == 0 def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, @@ -1763,13 +1769,9 @@ def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, continue assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert fx_variables.get('sftlf') def test_weighting_landsea_fraction_exclude_fail(tmp_path, patched_datafinder, @@ -1843,9 +1845,9 @@ def test_landmask(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() if product.attributes['project'] == 'obs4mips': @@ -1855,6 +1857,80 @@ def test_landmask(tmp_path, patched_datafinder, config_user): def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): + content = dedent(""" + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: + sftlf: + exp: piControl + mask_landseaice: + mask_out: sea + fx_variables: + sftgif: + exp: piControl + volume_statistics: + operator: mean + area_statistics: + operator: mean + fx_variables: + areacello: + mip: fx + exp: piControl + diagnostics: + diagnostic_name: + variables: + gpp: + preprocessor: landmask + project: CMIP5 + mip: Lmon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1 + additional_datasets: + - {dataset: CanESM2} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check custom fx variables + task = recipe.tasks.pop() + product = task.products.pop() + + # landsea + settings = product.settings['mask_landsea'] + assert len(settings) == 1 + assert settings['mask_out'] == 'sea' + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 3 + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] + + # landseaice + settings = product.settings['mask_landseaice'] + assert len(settings) == 1 + assert settings['mask_out'] == 'sea' + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] + + # volume statistics + settings = product.settings['volume_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + assert 'volcello' not in fx_variables + + # area statistics + settings = product.settings['area_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + assert '_fx_' in fx_variables['areacello']['filename'] + assert '_piControl_' in fx_variables['areacello']['filename'] + + +def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): content = dedent(""" preprocessors: landmask: @@ -1870,7 +1946,6 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): operator: mean fx_variables: [{'short_name': 'areacello', 'mip': 'fx', 'exp': 'piControl'}] - diagnostics: diagnostic_name: variables: @@ -1894,39 +1969,33 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): # landsea settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftlf'] - assert '_piControl_' in fx_variables['sftlf'] + assert len(fx_variables) == 3 + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # landseaice settings = product.settings['mask_landseaice'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftgif'] - assert '_piControl_' in fx_variables['sftgif'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # volume statistics settings = product.settings['volume_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' - assert 'fx_variables' not in settings + assert 'volcello' not in fx_variables # area statistics settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['areacello'] - assert '_piControl_' in fx_variables['areacello'] + assert '_fx_' in fx_variables['areacello']['filename'] + assert '_piControl_' in fx_variables['areacello']['filename'] def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): @@ -1968,10 +2037,10 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 3 + assert len(settings) == 2 assert settings['mask_out'] == 'sea' assert settings['always_use_ne_mask'] is False - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() assert not any(fx_variables) @@ -1980,6 +2049,72 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): TAGS.set_tag_values(TAGS_FOR_TESTING) + content = dedent(""" + preprocessors: + preproc: + area_statistics: + operator: mean + fx_variables: + areacella: + ensemble: r2i1p1f1 + areacello: + clayfrac: + sftlf: + sftgif: + mip: fx + sftof: + mask_landsea: + mask_out: sea + + diagnostics: + diagnostic_name: + variables: + tas: + preprocessor: preproc + project: CMIP6 + mip: Amon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tas' + assert len(task.products) == 1 + product = task.products.pop() + + # Check area_statistics + assert 'area_statistics' in product.settings + settings = product.settings['area_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 6 + assert '_fx_' in fx_variables['areacella']['filename'] + assert '_r2i1p1f1_' in fx_variables['areacella']['filename'] + assert '_Ofx_' in fx_variables['areacello']['filename'] + assert '_Efx_' in fx_variables['clayfrac']['filename'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_Ofx_' in fx_variables['sftof']['filename'] + + # Check mask_landsea + assert 'mask_landsea' in product.settings + settings = product.settings['mask_landsea'] + assert len(settings) == 1 + assert settings['mask_out'] == 'sea' + + +def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): content = dedent(""" preprocessors: preproc: @@ -1992,7 +2127,7 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): 'sftlf', 'sftgif', 'sftof', - ] + ] mask_landsea: mask_out: sea @@ -2024,34 +2159,23 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): # Check area_statistics assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella'] - assert '_Ofx_' in fx_variables['areacello'] - assert '_Efx_' in fx_variables['clayfrac'] - assert '_fx_' in fx_variables['sftlf'] - assert '_fx_' in fx_variables['sftgif'] - assert '_Ofx_' in fx_variables['sftof'] + assert '_fx_' in fx_variables['areacella']['filename'] + assert '_Ofx_' in fx_variables['areacello']['filename'] + assert '_Efx_' in fx_variables['clayfrac']['filename'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_Ofx_' in fx_variables['sftof']['filename'] # Check mask_landsea assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - fx_variables = fx_variables.values() - assert len(fx_variables) == 2 - for fx_file in fx_variables: - if 'sftlf' in fx_file: - assert '_fx_' in fx_file - elif 'sftof' in fx_file: - assert '_Ofx_' in fx_file - else: - assert False def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2063,7 +2187,9 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Ofx diagnostics: diagnostic_name: @@ -2093,13 +2219,13 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Omon_' in fx_variables['volcello'] - assert '_Ofx_' not in fx_variables['volcello'] + assert '_Omon_' not in fx_variables['volcello']['filename'] + assert '_Ofx_' in fx_variables['volcello']['filename'] def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2109,8 +2235,10 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: [{'short_name': 'volcello', 'mip': 'Oyr', - 'exp': 'piControl'}] + fx_variables: + volcello: + mip: Oyr + exp: piControl diagnostics: diagnostic_name: @@ -2140,14 +2268,14 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Oyr_' in fx_variables['volcello'] - assert '_piControl_' in fx_variables['volcello'] - assert '_Omon_' not in fx_variables['volcello'] + assert '_Oyr_' in fx_variables['volcello']['filename'][0] + assert '_piControl_' in fx_variables['volcello']['filename'][0] + assert '_Omon_' not in fx_variables['volcello']['filename'][0] def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, @@ -2199,7 +2327,8 @@ def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, settings = product.settings['area_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' - assert 'fx_variables' not in settings + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert fx_variables == {} def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, @@ -2209,7 +2338,9 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Omon diagnostics: diagnostic_name: @@ -2239,13 +2370,13 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'] - assert '_Omon_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello']['filename'][0] + assert '_Omon_' in fx_variables['volcello']['filename'][0] def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, @@ -2255,7 +2386,9 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Oyr diagnostics: diagnostic_name: @@ -2285,13 +2418,13 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'] - assert '_Oyr_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello']['filename'][0] + assert '_Oyr_' in fx_variables['volcello']['filename'][0] def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, @@ -2301,7 +2434,8 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: diagnostics: diagnostic_name: @@ -2330,13 +2464,13 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['volcello'] - assert '_Omon_' not in fx_variables['volcello'] + assert '_fx_' in fx_variables['volcello']['filename'] + assert '_Omon_' not in fx_variables['volcello']['filename'] def test_wrong_project(tmp_path, patched_datafinder, config_user): @@ -2345,7 +2479,8 @@ def test_wrong_project(tmp_path, patched_datafinder, config_user): preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: diagnostics: diagnostic_name: @@ -2378,10 +2513,9 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): preproc: area_statistics: operator: mean - fx_variables: [ - 'areacella', - 'wrong_fx_variable', - ] + fx_variables: + areacella: + wrong_fx_variable: diagnostics: diagnostic_name: @@ -2400,7 +2534,7 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): scripts: null """) msg = ("Requested fx variable 'wrong_fx_variable' not available in any " - "'fx'-related CMOR table") + "CMOR table") with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index b0563e539a..2a521dc680 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -20,6 +20,7 @@ extract_region, extract_shape, ) +from esmvalcore.preprocessor._shared import guess_bounds class Test(tests.Test): @@ -69,6 +70,22 @@ def test_area_statistics_mean(self): expected = np.array([1.]) self.assert_array_equal(result.data, expected) + def test_area_statistics_cell_measure_mean(self): + """ + Test for area average of a 2D field. + The area measure is pre-loaded in the cube""" + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube) + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) + result = area_statistics(self.grid, 'mean') + expected = np.array([1.]) + self.assert_array_equal(result.data, expected) + def test_area_statistics_min(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'min') @@ -125,6 +142,27 @@ def test_extract_region(self): expected = np.ones((2, 2)) self.assert_array_equal(result.data, expected) + def test_extract_region_mean(self): + """ + Test for extracting a region and performing + the area mean of a 2D field. + """ + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube) + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) + region = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) + # expected outcome + expected = np.ones((2, 2)) + self.assert_array_equal(region.data, expected) + result = area_statistics(region, 'mean') + expected_mean = np.array([1.]) + self.assert_array_equal(result.data, expected_mean) + def test_extract_region_neg_lon(self): """Test for extracting a region with a negative longitude field.""" result = extract_region(self.negative_grid, -0.5, 0.5, -0.5, 0.5) diff --git a/tests/unit/preprocessor/_mask/test_mask.py b/tests/unit/preprocessor/_mask/test_mask.py index 2d4c8948e6..a6b28e2cae 100644 --- a/tests/unit/preprocessor/_mask/test_mask.py +++ b/tests/unit/preprocessor/_mask/test_mask.py @@ -7,7 +7,7 @@ import iris import tests from cf_units import Unit -from esmvalcore.preprocessor._mask import (_apply_fx_mask, _check_dims, +from esmvalcore.preprocessor._mask import (_apply_fx_mask, count_spells, _get_fx_mask, mask_above_threshold, mask_below_threshold, @@ -63,12 +63,6 @@ def test_apply_fx_mask_on_masked_data(self): mask=dummy_fx_mask) self.assert_array_equal(fixed_mask, app_mask) - def test_check_dims(self): - """Test _check_dims func.""" - malformed_cube = self.arr[0] - np.testing.assert_equal(True, _check_dims(self.arr, self.arr)) - np.testing.assert_equal(False, _check_dims(self.arr, malformed_cube)) - def test_count_spells(self): """Test count_spells func.""" ref_spells = count_spells(self.time_cube.data, -1000., 0, 1) diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py index 6335a74f64..08a1ee26c0 100644 --- a/tests/unit/preprocessor/_other/test_other.py +++ b/tests/unit/preprocessor/_other/test_other.py @@ -2,7 +2,6 @@ import unittest -import iris import iris.coord_categorisation import iris.coords import numpy as np diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index 0e07231609..82a755660d 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -10,7 +10,9 @@ from esmvalcore.preprocessor._volume import (volume_statistics, depth_integration, extract_trajectory, - extract_transect, extract_volume) + extract_transect, + extract_volume, + calculate_volume) class Test(tests.Test): @@ -83,12 +85,46 @@ def test_extract_volume(self): print(result.data, expected.data) self.assert_array_equal(result.data, expected) + def test_extract_volume_mean(self): + """ + Test to extract the top two layers and compute the + weighted average of a cube.""" + grid_volume = calculate_volume(self.grid_4d) + measure = iris.coords.CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume') + self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) + result = extract_volume(self.grid_4d, 0., 10.) + expected = np.ma.ones((2, 2, 2, 2)) + self.assert_array_equal(result.data, expected) + result_mean = volume_statistics(result, 'mean') + expected_mean = np.ma.array([1., 1.], mask=False) + self.assert_array_equal(result_mean.data, expected_mean) + def test_volume_statistics(self): """Test to take the volume weighted average of a (2,3,2,2) cube.""" result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) + def test_volume_statistics_cell_measure(self): + """ + Test to take the volume weighted average of a (2,3,2,2) cube. + The volume measure is pre-loaded in the cube. + """ + grid_volume = calculate_volume(self.grid_4d) + measure = iris.coords.CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume') + self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) + result = volume_statistics(self.grid_4d, 'mean') + expected = np.ma.array([1., 1.], mask=False) + self.assert_array_equal(result.data, expected) + def test_volume_statistics_long(self): """ Test to take the volume weighted average of a (4,3,2,2) cube. diff --git a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py index ab08737a32..2de0ff3c80 100644 --- a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py +++ b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py @@ -1,6 +1,4 @@ """Unit tests for :mod:`esmvalcore.preprocessor._weighting`.""" -from typing import Dict, List -from unittest import mock import iris import numpy as np @@ -9,95 +7,69 @@ import esmvalcore.preprocessor._weighting as weighting +crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) +LON_3 = iris.coords.DimCoord([0, 1.5, 3], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_east', + coord_system=crd_sys) +LON_4 = iris.coords.DimCoord([0, 1.5, 2.5, 3.5], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3], + [3, 4]], + units='degrees_east', + coord_system=crd_sys) + CUBE_SFTLF = iris.cube.Cube( [10.0, 0.0, 100.0], var_name='sftlf', standard_name='land_area_fraction', units=Unit('%'), + dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_SFTOF = iris.cube.Cube( [100.0, 0.0, 50.0, 70.0], var_name='sftof', standard_name='sea_area_fraction', units=Unit('%'), + dim_coords_and_dims=[(LON_4, 0), ] ) CUBE_3 = iris.cube.Cube( [10.0, 20.0, 0.0], var_name='dim3', + dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_4 = iris.cube.Cube( [1.0, 2.0, -1.0, 2.0], var_name='dim4', + dim_coords_and_dims=[(LON_4, 0), ] ) + +CUBE_ANCILLARY_3 = CUBE_3.copy() +CUBE_ANCILLARY_3.add_ancillary_variable(CUBE_SFTLF, (0)) + +CUBE_ANCILLARY_4 = CUBE_4.copy() +CUBE_ANCILLARY_4.add_ancillary_variable(CUBE_SFTOF, (0)) + FRAC_SFTLF = np.array([0.1, 0.0, 1.0]) FRAC_SFTOF = np.array([0.0, 1.0, 0.5, 0.3]) -EMPTY_FX_FILES: Dict[str, List] = { - 'sftlf': [], - 'sftof': [], -} -L_FX_FILES = { - 'sftlf': 'not/a/real/path', - 'sftof': [], -} -O_FX_FILES = { - 'sftlf': [], - 'sftof': 'not/a/real/path', -} -FX_FILES = { - 'sftlf': 'not/a/real/path', - 'sftof': 'i/was/mocked', -} -WRONG_FX_FILES = { - 'wrong': 'test', - 'sftlf': 'not/a/real/path', - 'sftof': 'i/was/mocked', -} LAND_FRACTION = [ - (CUBE_3, {}, [], None, ["No fx files given"]), - (CUBE_3, {'sftlf': []}, [], None, ["'sftlf' not found"]), - (CUBE_3, {'sftlf': 'a'}, [CUBE_SFTLF], FRAC_SFTLF, []), - (CUBE_3, {'sftof': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), - (CUBE_3, EMPTY_FX_FILES, [], None, - ["'sftlf' not found", "'sftof' not found"]), - (CUBE_3, L_FX_FILES, [CUBE_SFTLF], FRAC_SFTLF, []), - (CUBE_3, O_FX_FILES, [CUBE_SFTOF], None, - ["'sftlf' not found", "not broadcastable"]), - (CUBE_3, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, []), - (CUBE_3, {'wrong': 'a'}, [CUBE_SFTLF], None, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_3, {'wrong': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), - (CUBE_3, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_3, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, - ["not broadcastable"]), - (CUBE_4, {}, [], None, ["No fx files given"]), - (CUBE_4, {'sftlf': []}, [], None, ["'sftlf' not found"]), - (CUBE_4, {'sftlf': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), - (CUBE_4, {'sftof': 'a'}, [CUBE_SFTOF], FRAC_SFTOF, []), - (CUBE_4, EMPTY_FX_FILES, [], None, - ["'sftlf' not found", "'sftof' not found"]), - (CUBE_4, L_FX_FILES, [CUBE_SFTLF], None, - ["not broadcastable", "'sftof' not found"]), - (CUBE_4, O_FX_FILES, [CUBE_SFTOF], FRAC_SFTOF, ["'sftlf' not found"]), - (CUBE_4, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["not broadcastable"]), - (CUBE_4, {'wrong': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), - (CUBE_4, {'wrong': 'a'}, [CUBE_SFTOF], None, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_4, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["not broadcastable", "not broadcastable"]), - (CUBE_4, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["expected 'sftlf' or 'sftof'", "not broadcastable"]), + (CUBE_3, None, [ + 'Ancillary variables land/sea area fraction not found in cube. ' + 'Check fx_file availability.']), + (CUBE_4, None, [ + 'Ancillary variables land/sea area fraction not found in cube. ' + 'Check fx_file availability.']), + (CUBE_ANCILLARY_3, FRAC_SFTLF, []), + (CUBE_ANCILLARY_4, FRAC_SFTOF, []) ] -@pytest.mark.parametrize('cube,fx_files,fx_cubes,out,err', LAND_FRACTION) -@mock.patch.object(weighting, 'iris', autospec=True) -def test_get_land_fraction(mock_iris, cube, fx_files, fx_cubes, out, err): +@pytest.mark.parametrize('cube,out,err', LAND_FRACTION) +def test_get_land_fraction(cube, out, err): """Test calculation of land fraction.""" - mock_iris.load_cube.side_effect = fx_cubes - (land_fraction, errors) = weighting._get_land_fraction(cube, fx_files) + (land_fraction, errors) = weighting._get_land_fraction(cube) if land_fraction is None: assert land_fraction == out else: @@ -105,46 +77,6 @@ def test_get_land_fraction(mock_iris, cube, fx_files, fx_cubes, out, err): assert len(errors) == len(err) for (idx, error) in enumerate(errors): assert err[idx] in error - mock_iris.reset_mock() - - -SHAPES_TO_BROADCAST = [ - ((), (1, ), True), - ((), (10, 10), True), - ((1, ), (10, ), True), - ((1, ), (10, 10), True), - ((2, ), (10, ), False), - ((10, ), (), True), - ((10, ), (1, ), True), - ((10, ), (10, ), True), - ((10, ), (10, 10), True), - ((10, ), (7, 1), True), - ((10, ), (10, 7), False), - ((10, ), (7, 1, 10), True), - ((10, ), (7, 1, 1), True), - ((10, ), (7, 1, 7), False), - ((10, ), (7, 10, 7), False), - ((10, 1), (1, 1), True), - ((10, 1), (1, 100), True), - ((10, 1), (10, 7), True), - ((10, 12), (10, 1), True), - ((10, 12), (), True), - ((10, 12), (1, ), True), - ((10, 12), (12, ), True), - ((10, 12), (1, 1), True), - ((10, 12), (1, 12), True), - ((10, 12), (10, 10, 1), True), - ((10, 12), (10, 12, 1), False), - ((10, 12), (10, 12, 12), False), - ((10, 12), (10, 10, 12), True), -] - - -@pytest.mark.parametrize('shape_1,shape_2,out', SHAPES_TO_BROADCAST) -def test_shape_is_broadcastable(shape_1, shape_2, out): - """Test check if two shapes are broadcastable.""" - is_broadcastable = weighting._shape_is_broadcastable(shape_1, shape_2) - assert is_broadcastable == out CUBE_3_L = CUBE_3.copy([1.0, 0.0, 0.0]) @@ -153,37 +85,20 @@ def test_shape_is_broadcastable(shape_1, shape_2, out): CUBE_4_O = CUBE_4.copy([1.0, 0.0, -0.5, 1.4]) WEIGHTING_LANDSEA_FRACTION = [ - (CUBE_3, {}, 'land', ValueError), - (CUBE_3, {}, 'sea', ValueError), - (CUBE_3, EMPTY_FX_FILES, 'land', ValueError), - (CUBE_3, EMPTY_FX_FILES, 'sea', ValueError), - (CUBE_3, L_FX_FILES, 'land', CUBE_3_L), - (CUBE_3, L_FX_FILES, 'sea', CUBE_3_O), - (CUBE_3, O_FX_FILES, 'land', ValueError), - (CUBE_3, O_FX_FILES, 'sea', ValueError), - (CUBE_3, FX_FILES, 'land', CUBE_3_L), - (CUBE_3, FX_FILES, 'sea', CUBE_3_O), - (CUBE_3, FX_FILES, 'wrong', TypeError), - (CUBE_4, {}, 'land', ValueError), - (CUBE_4, {}, 'sea', ValueError), - (CUBE_4, EMPTY_FX_FILES, 'land', ValueError), - (CUBE_4, EMPTY_FX_FILES, 'sea', ValueError), - (CUBE_4, L_FX_FILES, 'land', ValueError), - (CUBE_4, L_FX_FILES, 'sea', ValueError), - (CUBE_4, O_FX_FILES, 'land', CUBE_4_L), - (CUBE_4, O_FX_FILES, 'sea', CUBE_4_O), - (CUBE_4, FX_FILES, 'land', CUBE_4_L), - (CUBE_4, FX_FILES, 'sea', CUBE_4_O), - (CUBE_4, FX_FILES, 'wrong', TypeError), + (CUBE_3, 'land', ValueError), + (CUBE_3, 'sea', ValueError), + (CUBE_ANCILLARY_3, 'land', CUBE_3_L), + (CUBE_ANCILLARY_3, 'sea', CUBE_3_O), + (CUBE_4, 'land', ValueError), + (CUBE_4, 'sea', ValueError), + (CUBE_ANCILLARY_4, 'land', CUBE_4_L), + (CUBE_ANCILLARY_4, 'sea', CUBE_4_O), ] -@pytest.mark.parametrize('cube,fx_files,area_type,out', +@pytest.mark.parametrize('cube,area_type,out', WEIGHTING_LANDSEA_FRACTION) -@mock.patch.object(weighting, 'iris', autospec=True) -def test_weighting_landsea_fraction(mock_iris, - cube, - fx_files, +def test_weighting_landsea_fraction(cube, area_type, out): """Test landsea fraction weighting preprocessor.""" @@ -191,18 +106,10 @@ def test_weighting_landsea_fraction(mock_iris, if isinstance(out, type): with pytest.raises(out): weighted_cube = weighting.weighting_landsea_fraction( - cube, fx_files, area_type) + cube, area_type) return # Regular cases - fx_cubes = [] - if fx_files.get('sftlf'): - fx_cubes.append(CUBE_SFTLF) - if fx_files.get('sftof'): - fx_cubes.append(CUBE_SFTOF) - mock_iris.load_cube.side_effect = fx_cubes - weighted_cube = weighting.weighting_landsea_fraction( - cube, fx_files, area_type) - assert weighted_cube == cube + weighted_cube = weighting.weighting_landsea_fraction(cube, area_type) + assert np.array_equal(weighted_cube.data, cube.data) assert weighted_cube is cube - mock_iris.reset_mock()