From 77d9075ddf678fb60980e38fb6a31f8a94b7a270 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Wed, 3 Feb 2021 15:49:11 +0100 Subject: [PATCH 01/56] Load areacella/o and volcello as cell measures --- esmvalcore/_recipe.py | 11 ++++++- esmvalcore/preprocessor/_area.py | 51 ++++++------------------------ esmvalcore/preprocessor/_io.py | 20 +++++++++++- esmvalcore/preprocessor/_volume.py | 29 ++++++----------- 4 files changed, 49 insertions(+), 62 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 97fb914bce..294da31160 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -260,6 +260,7 @@ def _get_default_settings(variable, config_user, derive=False): # Configure loading settings['load'] = { 'callback': concatenate_callback, + 'fx_variables': None } # Configure concatenation settings['concatenate'] = {} @@ -469,14 +470,22 @@ def _get_fx_vars_from_attribute(step_settings, step_name): fx_steps = [ 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', - 'area_statistics', 'volume_statistics', 'zonal_statistics' + 'zonal_statistics' ] + cell_measure_steps = [ + 'area_statistics', 'volume_statistics' + ] for step_name, step_settings in settings.items(): if step_name in fx_steps: fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) _update_fx_files(step_name, step_settings, variable, config_user, fx_vars) + elif step_name in cell_measure_steps: + fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) + _update_fx_files('load', settings['load'], + variable, config_user, fx_vars) + def _read_attributes(filename): diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index d035000544..484f9f3b5b 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -157,45 +157,6 @@ def meridional_statistics(cube, operator): raise ValueError(msg) -def tile_grid_areas(cube, fx_files): - """Tile the grid area data to match the dataset cube. - - Parameters - ---------- - cube: iris.cube.Cube - input cube. - fx_files: dict - dictionary of field:filename for the fx_files - - Returns - ------- - iris.cube.Cube - Freshly tiled grid areas cube. - """ - grid_areas = None - if fx_files: - for key, fx_file in fx_files.items(): - if not fx_file: - continue - logger.info('Attempting to load %s from file: %s', key, fx_file) - fx_cube = iris.load_cube(fx_file) - - grid_areas = fx_cube.core_data() - if cube.ndim == 4 and grid_areas.ndim == 2: - grid_areas = da.tile(grid_areas, - [cube.shape[0], cube.shape[1], 1, 1]) - elif cube.ndim == 4 and grid_areas.ndim == 3: - grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1, 1]) - elif cube.ndim == 3 and grid_areas.ndim == 2: - grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1]) - else: - raise ValueError('Grid and dataset number of dimensions not ' - 'recognised: {} and {}.' - ''.format(cube.ndim, grid_areas.ndim)) - return grid_areas - - -# get the area average def area_statistics(cube, operator, fx_variables=None): """Apply a statistical operator in the horizontal direction. @@ -248,7 +209,15 @@ def area_statistics(cube, operator, fx_variables=None): ValueError if input data cube has different shape than grid area weights """ - grid_areas = tile_grid_areas(cube, fx_variables) + grid_areas = None + try: + grid_areas = cube.cell_measure('cell_area').core_data() + except iris.exceptions.CellMeasureNotFoundError: + logger.info( + 'Cell measure "cell_area" not found in cube. ' + 'Check fx_file availability.' + ) + logger.info('Attempting to calculate grid cell area...') if not fx_variables and cube.coord('latitude').points.ndim == 2: coord_names = [coord.standard_name for coord in cube.coords()] @@ -269,7 +238,7 @@ def area_statistics(cube, operator, fx_variables=None): cube.coord('latitude')) coord_names = ['longitude', 'latitude'] - if grid_areas is None or not grid_areas.any(): + if grid_areas is None: cube = guess_bounds(cube, coord_names) grid_areas = iris.analysis.cartography.area_weights(cube) logger.info('Calculated grid area shape: %s', grid_areas.shape) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index f508f0fa4f..abdcac8aad 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -6,6 +6,7 @@ from itertools import groupby from warnings import catch_warnings, filterwarnings +import dask.array as da import iris import iris.exceptions import numpy as np @@ -101,7 +102,7 @@ def concatenate_callback(raw_cube, field, _): coord.units = units -def load(file, callback=None): +def load(file, callback=None, fx_variables=None): """Load iris cubes from files.""" logger.debug("Loading:\n%s", file) with catch_warnings(): @@ -122,6 +123,23 @@ def load(file, callback=None): if not raw_cubes: raise Exception('Can not load cubes from {0}'.format(file)) for cube in raw_cubes: + if fx_variables: + for fx_file in fx_variables.values(): + fx_cube = iris.load_raw(fx_file, callback=callback)[0] + measure_name = { + 'areacella': 'area', + 'areacello': 'area', + 'volcello': 'volume' + } + if fx_cube.var_name in measure_name.keys(): + fx_data = da.broadcast_to( + fx_cube.core_data(), raw_cubes[0].shape) + measure = iris.coords.CellMeasure( + fx_data, + standard_name=fx_cube.standard_name, + units=fx_cube.units, + measure=measure_name[fx_cube.var_name]) + cube.add_cell_measure(measure, range(0, measure.ndim)) cube.attributes['source_file'] = file return raw_cubes diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 589a6c68e3..7586ebdf9b 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -211,27 +211,18 @@ def volume_statistics( # Load z coordinate field and figure out which dim is which. t_dim = cube.coord_dims('time')[0] - grid_volume_found = False - grid_volume = None - if fx_variables: - for key, fx_file in fx_variables.items(): - if fx_file is None: - continue - logger.info('Attempting to load %s from file: %s', key, fx_file) - fx_cube = iris.load_cube(fx_file) - - grid_volume = fx_cube.data - grid_volume_found = True - cube_shape = cube.data.shape - - if not grid_volume_found: + try: + grid_volume = cube.cell_measure('ocean_volume').core_data() + except iris.exceptions.CellMeasureNotFoundError: + logger.info( + 'Cell measure "ocean_volume" not found in cube. ' + 'Check fx_file availability.' + ) + logger.info('Attempting to calculate grid cell volume...') + + if not fx_variables: grid_volume = calculate_volume(cube) - # Check whether the dimensions are right. - if cube.data.ndim == 4 and grid_volume.ndim == 3: - grid_volume = np.tile(grid_volume, - [cube_shape[0], 1, 1, 1]) - if cube.data.shape != grid_volume.shape: raise ValueError('Cube shape ({}) doesn`t match grid volume shape ' '({})'.format(cube.data.shape, grid_volume.shape)) From 7c47aa597498e901568fc5d49450d17aeeb6b740 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Thu, 4 Feb 2021 18:13:15 +0100 Subject: [PATCH 02/56] Fix tests --- esmvalcore/_recipe.py | 14 ++++---------- tests/integration/test_recipe.py | 2 ++ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 294da31160..c8e6efd655 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -470,22 +470,16 @@ def _get_fx_vars_from_attribute(step_settings, step_name): fx_steps = [ 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', - 'zonal_statistics' - ] - - cell_measure_steps = [ - 'area_statistics', 'volume_statistics' + 'zonal_statistics', 'area_statistics', 'volume_statistics' ] for step_name, step_settings in settings.items(): if step_name in fx_steps: fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) _update_fx_files(step_name, step_settings, variable, config_user, fx_vars) - elif step_name in cell_measure_steps: - fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) - _update_fx_files('load', settings['load'], - variable, config_user, fx_vars) - + if step_name in ['area_statistics', 'volume_statistics']: + _update_fx_files('load', settings['load'], + variable, config_user, fx_vars) def _read_attributes(filename): diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 082b264be5..6044e3b1c5 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -87,6 +87,7 @@ def _get_default_settings_for_chl(fix_dir, save_filename): defaults = { 'load': { 'callback': concatenate_callback, + 'fx_variables': None }, 'concatenate': {}, 'fix_file': { @@ -509,6 +510,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): defaults = { 'load': { 'callback': concatenate_callback, + 'fx_variables': None }, 'concatenate': {}, 'fix_file': { From 72f16f82abe87f19d28f07a6fea6908a23384658 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 5 Feb 2021 11:54:22 +0100 Subject: [PATCH 03/56] Add tests --- tests/unit/preprocessor/_area/test_area.py | 36 +++++++++++++++++- .../unit/preprocessor/_volume/test_volume.py | 38 ++++++++++++++++++- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index 196e79bbdc..34400933e3 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -15,6 +15,7 @@ from esmvalcore.preprocessor._area import (_crop_cube, area_statistics, extract_named_regions, extract_region, extract_shape) +from esmvalcore.preprocessor._shared import guess_bounds class Test(tests.Test): @@ -59,8 +60,18 @@ def setUp(self): self.negative_grid = iris.cube.Cube(ndata, dim_coords_and_dims=coords_spec) - def test_area_statistics_mean(self): - """Test for area average of a 2D field.""" + def test_area_statistics_cell_measure_mean(self): + """ + Test for area average of a 2D field. + The area measure is pre-loaded in the cube""" + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube) + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) result = area_statistics(self.grid, 'mean') expected = np.array([1.]) self.assert_array_equal(result.data, expected) @@ -121,6 +132,27 @@ def test_extract_region(self): expected = np.ones((2, 2)) self.assert_array_equal(result.data, expected) + def test_extract_region_mean(self): + """ + Test for extracting a region and performing + the area mean of a 2D field. + """ + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube) + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) + region = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) + # expected outcome + expected = np.ones((2, 2)) + self.assert_array_equal(region.data, expected) + result = area_statistics(region, 'mean') + expected_mean = np.array([1.]) + self.assert_array_equal(result.data, expected_mean) + def test_extract_region_neg_lon(self): """Test for extracting a region with a negative longitude field.""" result = extract_region(self.negative_grid, -0.5, 0.5, -0.5, 0.5) diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index 0e07231609..c5fc10879a 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -10,7 +10,9 @@ from esmvalcore.preprocessor._volume import (volume_statistics, depth_integration, extract_trajectory, - extract_transect, extract_volume) + extract_transect, + extract_volume, + calculate_volume) class Test(tests.Test): @@ -83,12 +85,46 @@ def test_extract_volume(self): print(result.data, expected.data) self.assert_array_equal(result.data, expected) + def test_extract_volume_mean(self): + """ + Test to extract the top two layers and compute the + weighted average of a cube.""" + grid_volume = calculate_volume(self.grid_4d) + measure = iris.coords.CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume') + self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) + result = extract_volume(self.grid_4d, 0., 10.) + expected = np.ma.ones((2, 2, 2, 2)) + self.assert_array_equal(result.data, expected) + result_mean = volume_statistics(result, 'mean') + expected_mean = np.ma.array([1., 1.], mask=False) + self.assert_array_equal(result_mean.data, expected_mean) + def test_volume_statistics(self): """Test to take the volume weighted average of a (2,3,2,2) cube.""" result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) + def test_volume_statistics_cell_measure(self): + """ + Test to take the volume weighted average of a (2,3,2,2) cube. + The volume measure is pre-loaded in the cube. + """ + grid_volume = calculate_volume(self.grid_4d) + measure = iris.coords.CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) + result = volume_statistics(self.grid_4d, 'mean') + expected = np.ma.array([1., 1.], mask=False) + self.assert_array_equal(result.data, expected) + def test_volume_statistics_long(self): """ Test to take the volume weighted average of a (4,3,2,2) cube. From 33476823cac143dfbbf284c6ddf599c13f3b7161 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 5 Feb 2021 12:00:32 +0100 Subject: [PATCH 04/56] Restore original test --- tests/unit/preprocessor/_area/test_area.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index 34400933e3..09aaaeb3ae 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -60,6 +60,12 @@ def setUp(self): self.negative_grid = iris.cube.Cube(ndata, dim_coords_and_dims=coords_spec) + def test_area_statistics_mean(self): + """Test for area average of a 2D field.""" + result = area_statistics(self.grid, 'mean') + expected = np.array([1.]) + self.assert_array_equal(result.data, expected) + def test_area_statistics_cell_measure_mean(self): """ Test for area average of a 2D field. From 6bc7b8a60790b8044b20b00b1eba95a1b81c552a Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 5 Feb 2021 12:17:17 +0100 Subject: [PATCH 05/56] Fix tests --- tests/unit/preprocessor/_volume/test_volume.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index c5fc10879a..82a755660d 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -10,7 +10,7 @@ from esmvalcore.preprocessor._volume import (volume_statistics, depth_integration, extract_trajectory, - extract_transect, + extract_transect, extract_volume, calculate_volume) @@ -87,7 +87,7 @@ def test_extract_volume(self): def test_extract_volume_mean(self): """ - Test to extract the top two layers and compute the + Test to extract the top two layers and compute the weighted average of a cube.""" grid_volume = calculate_volume(self.grid_4d) measure = iris.coords.CellMeasure( @@ -120,7 +120,7 @@ def test_volume_statistics_cell_measure(self): standard_name='ocean_volume', units='m3', measure='volume') - self.grid.add_cell_measure(measure, range(0, measure.ndim)) + self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) From 42e62db454bf1d716be2dd80484c6185e460d8fa Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 8 Feb 2021 12:52:15 +0100 Subject: [PATCH 06/56] Move to separate preproc step --- esmvalcore/_recipe.py | 17 +++-- esmvalcore/preprocessor/__init__.py | 5 +- esmvalcore/preprocessor/_io.py | 20 +----- esmvalcore/preprocessor/_other.py | 97 +++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 25 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index c8e6efd655..adf21d66dc 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -260,7 +260,6 @@ def _get_default_settings(variable, config_user, derive=False): # Configure loading settings['load'] = { 'callback': concatenate_callback, - 'fx_variables': None } # Configure concatenation settings['concatenate'] = {} @@ -320,6 +319,13 @@ def _get_default_settings(variable, config_user, derive=False): if variable['short_name'] != variable['original_short_name']: settings['save']['alias'] = variable['short_name'] + settings['add_cell_measure'] = { + 'fx_variables': None, + 'project': variable['project'], + 'dataset': variable['dataset'], + 'check_level': config_user.get('check_level', CheckLevels.DEFAULT), + } + return settings @@ -393,6 +399,7 @@ def _get_fx_file(variable, fx_variable, config_user): if fx_files: logger.debug("Found fx variables '%s':\n%s", fx_varname, pformat(fx_files)) + fx_variable['mip'] = fx_mip break # If fx variable was not found in any table, raise exception @@ -407,7 +414,8 @@ def _get_fx_file(variable, fx_variable, config_user): # allow for empty lists corrected for by NE masks if fx_files: - fx_files = fx_files[0] + if 'fx' in fx_variable['mip']: + fx_files = fx_files[0] if valid_fx_vars: valid_fx_vars = valid_fx_vars[0] @@ -478,8 +486,9 @@ def _get_fx_vars_from_attribute(step_settings, step_name): _update_fx_files(step_name, step_settings, variable, config_user, fx_vars) if step_name in ['area_statistics', 'volume_statistics']: - _update_fx_files('load', settings['load'], - variable, config_user, fx_vars) + _update_fx_files( + 'add_cell_measure', settings['add_cell_measure'], + variable, config_user, fx_vars) def _read_attributes(filename): diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 9aa5804395..c2ef6c5bdf 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -41,7 +41,7 @@ mask_outside_range, ) from ._multimodel import multi_model_statistics -from ._other import clip +from ._other import clip, add_cell_measure from ._regrid import extract_levels, extract_point, regrid from ._time import ( annual_statistics, @@ -98,6 +98,7 @@ # Data reformatting/CMORization 'fix_data', 'cmor_check_data', + 'add_cell_measure', # Time extraction (as defined in the preprocessor section) 'extract_time', 'extract_season', @@ -181,7 +182,7 @@ DEFAULT_ORDER = tuple(__all__) # The order of initial and final steps cannot be configured -INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('cmor_check_data') + 1] +INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('add_cell_measure') + 1] FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('save'):] MULTI_MODEL_FUNCTIONS = { diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index abdcac8aad..f508f0fa4f 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -6,7 +6,6 @@ from itertools import groupby from warnings import catch_warnings, filterwarnings -import dask.array as da import iris import iris.exceptions import numpy as np @@ -102,7 +101,7 @@ def concatenate_callback(raw_cube, field, _): coord.units = units -def load(file, callback=None, fx_variables=None): +def load(file, callback=None): """Load iris cubes from files.""" logger.debug("Loading:\n%s", file) with catch_warnings(): @@ -123,23 +122,6 @@ def load(file, callback=None, fx_variables=None): if not raw_cubes: raise Exception('Can not load cubes from {0}'.format(file)) for cube in raw_cubes: - if fx_variables: - for fx_file in fx_variables.values(): - fx_cube = iris.load_raw(fx_file, callback=callback)[0] - measure_name = { - 'areacella': 'area', - 'areacello': 'area', - 'volcello': 'volume' - } - if fx_cube.var_name in measure_name.keys(): - fx_data = da.broadcast_to( - fx_cube.core_data(), raw_cubes[0].shape) - measure = iris.coords.CellMeasure( - fx_data, - standard_name=fx_cube.standard_name, - units=fx_cube.units, - measure=measure_name[fx_cube.var_name]) - cube.add_cell_measure(measure, range(0, measure.ndim)) cube.attributes['source_file'] = file return raw_cubes diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index 697e8b3347..d3c2a49e0d 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -3,6 +3,7 @@ """ import logging +import iris import dask.array as da @@ -38,3 +39,99 @@ def clip(cube, minimum=None, maximum=None): raise ValueError("Maximum should be equal or larger than minimum.") cube.data = da.clip(cube.core_data(), minimum, maximum) return cube + + +def add_cell_measure(cube, fx_variables, project, dataset, check_level): + """ + Load requested fx files, check with CMOR standards and add the + fx variables as cell measures in the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + iris cube + fx_variables: dict + path to the needed fx_files + project: str + + dataset: str + + check_level: CheckLevels + Level of strictness of the checks + + + Returns + ------- + iris.cube.Cube + Cube with added cell measures. + """ + from esmvalcore.preprocessor._io import concatenate + from esmvalcore.cmor.fix import fix_metadata, fix_data + from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data + + if not fx_variables: + return + fx_cubes = iris.cube.CubeList() + for fx_files in fx_variables.values(): + for fx_file in fx_files: + loaded_cube = iris.load(fx_file) + short_name = loaded_cube[0].var_name + mip = loaded_cube[0].attributes['table_id'] + freq = loaded_cube[0].attributes['frequency'] + loaded_cube = fix_metadata( + loaded_cube, + short_name=short_name, + project=project, + dataset=dataset, + mip=mip, + frequency=freq, + check_level=check_level) + fx_cubes.append(loaded_cube[0]) + fx_cube = concatenate(fx_cubes) + fx_cube = cmor_check_metadata( + fx_cube, + cmor_table=project, + mip=mip, + short_name=short_name, + frequency=freq, + check_level=check_level) + fx_cube = fix_data( + fx_cube, + short_name=short_name, + project=project, + dataset=dataset, + mip=mip, + frequency=freq, + check_level=check_level) + fx_cube = cmor_check_data( + fx_cube, + cmor_table=project, + mip=mip, + short_name=fx_cube.var_name, + frequency=freq, + check_level=check_level) + + measure_name = { + 'areacella': 'area', + 'areacello': 'area', + 'volcello': 'volume' + } + if fx_cube.var_name in measure_name.keys(): + try: + fx_data = da.broadcast_to( + fx_cube.core_data(), cube.shape) + except ValueError: + logger.error( + f"Frequencies of {cube.var_name} and " + f"{measure_name} cubes do not match." + ) + raise + measure = iris.coords.CellMeasure( + fx_data, + standard_name=fx_cube.standard_name, + units=fx_cube.units, + measure=measure_name[fx_cube.var_name], + var_name=fx_cube.var_name, + attributes=fx_cube.attributes) + cube.add_cell_measure(measure, range(0, measure.ndim)) + return cube From 158537b6004928547e4a5457f0085b0c64d0abfd Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 8 Feb 2021 12:52:31 +0100 Subject: [PATCH 07/56] Fix tests --- tests/integration/test_recipe.py | 33 +++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 6044e3b1c5..b27371c042 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -47,6 +47,7 @@ ) DEFAULT_PREPROCESSOR_STEPS = ( + 'add_cell_measure', 'cleanup', 'cmor_check_data', 'cmor_check_metadata', @@ -87,7 +88,6 @@ def _get_default_settings_for_chl(fix_dir, save_filename): defaults = { 'load': { 'callback': concatenate_callback, - 'fx_variables': None }, 'concatenate': {}, 'fix_file': { @@ -131,6 +131,12 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'short_name': 'chl', 'frequency': 'yr', }, + 'add_cell_measure': { + 'fx_variables': None, + 'project': 'CMIP5', + 'dataset': 'CanESM2', + 'check_level': CheckLevels.DEFAULT, + }, 'cleanup': { 'remove': [fix_dir] }, @@ -510,7 +516,6 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): defaults = { 'load': { 'callback': concatenate_callback, - 'fx_variables': None }, 'concatenate': {}, 'fix_file': { @@ -550,6 +555,12 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'short_name': 'sftlf', 'frequency': 'fx', }, + 'add_cell_measure': { + 'fx_variables': None, + 'project': 'CMIP5', + 'dataset': 'CanESM2', + 'check_level': CheckLevels.DEFAULT, + }, 'cleanup': { 'remove': [fix_dir] }, @@ -2093,8 +2104,8 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Omon_' in fx_variables['volcello'] - assert '_Ofx_' not in fx_variables['volcello'] + assert '_Omon_' in fx_variables['volcello'][0] + assert '_Ofx_' not in fx_variables['volcello'][0] def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2140,9 +2151,9 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Oyr_' in fx_variables['volcello'] - assert '_piControl_' in fx_variables['volcello'] - assert '_Omon_' not in fx_variables['volcello'] + assert '_Oyr_' in fx_variables['volcello'][0] + assert '_piControl_' in fx_variables['volcello'][0] + assert '_Omon_' not in fx_variables['volcello'][0] def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, @@ -2239,8 +2250,8 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'] - assert '_Omon_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello'][0] + assert '_Omon_' in fx_variables['volcello'][0] def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, @@ -2285,8 +2296,8 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'] - assert '_Oyr_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello'][0] + assert '_Oyr_' in fx_variables['volcello'][0] def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, From a92cbbf034a8d4e5402bc3ec37860bd46d517f1e Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 8 Feb 2021 12:58:59 +0100 Subject: [PATCH 08/56] Fix return statement --- esmvalcore/preprocessor/_other.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index d3c2a49e0d..18dbadda98 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -70,7 +70,7 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data if not fx_variables: - return + return cube fx_cubes = iris.cube.CubeList() for fx_files in fx_variables.values(): for fx_file in fx_files: From 5e370bfecbe6e58ff14b42a2c15a088ec5622a24 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 8 Feb 2021 13:06:40 +0100 Subject: [PATCH 09/56] Fix flake --- esmvalcore/preprocessor/_other.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index 18dbadda98..b51f019bae 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -49,16 +49,16 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): Parameters ---------- cube: iris.cube.Cube - iris cube + Iris cube with input data. fx_variables: dict - path to the needed fx_files + Path to the needed fx_files. project: str dataset: str check_level: CheckLevels - Level of strictness of the checks - + Level of strictness of the checks. + Returns ------- From 5023bddad1ac2dfbfcd1a3238baedeaab6af3628 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 8 Feb 2021 13:34:42 +0100 Subject: [PATCH 10/56] Fix format --- esmvalcore/preprocessor/_other.py | 49 ++++++++++++------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index b51f019bae..5dae052fca 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -78,44 +78,33 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): short_name = loaded_cube[0].var_name mip = loaded_cube[0].attributes['table_id'] freq = loaded_cube[0].attributes['frequency'] - loaded_cube = fix_metadata( - loaded_cube, - short_name=short_name, - project=project, - dataset=dataset, - mip=mip, - frequency=freq, - check_level=check_level) + + loaded_cube = fix_metadata(loaded_cube, short_name=short_name, + project=project, dataset=dataset, + mip=mip, frequency=freq, + check_level=check_level) fx_cubes.append(loaded_cube[0]) + fx_cube = concatenate(fx_cubes) - fx_cube = cmor_check_metadata( - fx_cube, - cmor_table=project, - mip=mip, - short_name=short_name, - frequency=freq, - check_level=check_level) - fx_cube = fix_data( - fx_cube, - short_name=short_name, - project=project, - dataset=dataset, - mip=mip, - frequency=freq, - check_level=check_level) - fx_cube = cmor_check_data( - fx_cube, - cmor_table=project, - mip=mip, - short_name=fx_cube.var_name, - frequency=freq, - check_level=check_level) + + fx_cube = cmor_check_metadata(fx_cube, cmor_table=project, mip=mip, + short_name=short_name, frequency=freq, + check_level=check_level) + + fx_cube = fix_data(fx_cube, short_name=short_name, project=project, + dataset=dataset, mip=mip, frequency=freq, + check_level=check_level) + + fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, + short_name=fx_cube.var_name, frequency=freq, + check_level=check_level) measure_name = { 'areacella': 'area', 'areacello': 'area', 'volcello': 'volume' } + if fx_cube.var_name in measure_name.keys(): try: fx_data = da.broadcast_to( From 1c23f19c265c2e3a790e7cb4c1f12773a1f8ac9f Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 9 Feb 2021 12:43:27 +0100 Subject: [PATCH 11/56] Call fx variables with yaml dicts --- doc/recipe/preprocessor.rst | 45 ++++++++--- esmvalcore/_recipe.py | 125 ++++++++++++++----------------- tests/integration/test_recipe.py | 71 +++++++++++------- 3 files changed, 133 insertions(+), 108 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 1fe9758775..b28a1c8c44 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -325,8 +325,11 @@ experiment is preferred for fx data retrieval: weighting_landsea_fraction: area_type: land exclude: ['CanESM2', 'reference_dataset'] - fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] - + fx_variables: + sftlf: + exp: piControl + sftof: + exp: piControl See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -388,7 +391,11 @@ experiment is preferred for fx data retrieval: landmask: mask_landsea: mask_out: sea - fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] + fx_variables: + sftlf: + exp: piControl + sftof: + exp: piControl If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the @@ -428,7 +435,9 @@ experiment is preferred for fx data retrieval: landseaicemask: mask_landseaice: mask_out: sea - fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + fx_variables: + sftgif: + exp: piControl See also :func:`esmvalcore.preprocessor.mask_landseaice`. @@ -1267,18 +1276,24 @@ region, depth layer or time period is required, then those regions need to be removed using other preprocessor operations in advance. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it as a list of variables e.g. +the user may specify it calling the variables e.g. .. code-block:: yaml - fx_variables: ['areacello', 'volcello'] + fx_variables: + areacello: + volcello: -or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (they key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + fx_variables: + areacello: + mip: Omon + volcello: + mip: fx The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. @@ -1327,18 +1342,24 @@ No depth coordinate is required as this is determined by Iris. This function works best when the ``fx_variables`` provide the cell volume. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it as a list of variables e.g. +the user may specify it calling the variables e.g. .. code-block:: yaml - fx_variables: ['areacello', 'volcello'] + fx_variables: + areacello: + volcello: -or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (they key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + fx_variables: + areacello: + mip: Omon + volcello: + mip: fx The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ba3107913a..20cfd9c37e 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -324,13 +324,10 @@ def _get_default_settings(variable, config_user, derive=False): return settings -def _add_fxvar_keys(fx_var_dict, variable): +def _add_fxvar_keys(fx_info, variable): """Add keys specific to fx variable to use get_input_filelist.""" - fx_variable = dict(variable) - fx_variable.update(fx_var_dict) - - # set variable names - fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable = deepcopy(variable) + fx_variable.update(fx_info) # add special ensemble for CMIP5 only if fx_variable['project'] == 'CMIP5': @@ -339,82 +336,68 @@ def _add_fxvar_keys(fx_var_dict, variable): # add missing cmor info _add_cmor_info(fx_variable, override=True) + return fx_variable -def _get_fx_file(variable, fx_variable, config_user): +def _get_fx_files(variable, fx_info, config_user): """Get fx files (searching all possible mips).""" - # make it a dict - if isinstance(fx_variable, str): - fx_varname = fx_variable - fx_variable = {'short_name': fx_varname} - else: - fx_varname = fx_variable['short_name'] # assemble info from master variable - var = dict(variable) var_project = variable['project'] # check if project in config-developer try: get_project_config(var_project) except ValueError: raise RecipeError( - f"Requested fx variable '{fx_varname}' with parent variable" - f"'{variable}' does not have a '{var_project}' project" - f"in config-developer.") - cmor_table = CMOR_TABLES[var_project] - valid_fx_vars = [] + f"Requested fx variable '{fx_info['short_name']}' " + f"with parent variable '{variable}' does not have " + f"a '{var_project}' project in config-developer.") + project_tables = CMOR_TABLES[var_project].tables # force only the mip declared by user - if 'mip' in fx_variable: - fx_mips = [fx_variable['mip']] + found_mip = False + if not fx_info['mip']: + for mip in project_tables: + fx_cmor = project_tables[mip].get(fx_info['short_name']) + if fx_cmor: + found_mip = True + fx_info['mip'] = mip + fx_info = _add_fxvar_keys(fx_info, variable) + logger.debug( + "For fx variable '%s', found table '%s'", + fx_info['short_name'], mip) + fx_files = _get_input_files(fx_info, config_user)[0] + if fx_files: + logger.debug( + "Found fx variables '%s':\n%s", + fx_info['short_name'], + pformat(fx_files)) + break else: - # Get all fx-related mips (original var mip, - # 'fx' and extend from cmor tables) - fx_mips = [variable['mip']] - fx_mips.extend(mip for mip in cmor_table.tables if 'fx' in mip) - - # Search all mips for available variables - # priority goes to user specified mip if available - searched_mips = [] - fx_files = [] - for fx_mip in fx_mips: - fx_cmor_variable = cmor_table.get_variable(fx_mip, fx_varname) - if fx_cmor_variable is not None: - fx_var_dict = dict(fx_variable) - searched_mips.append(fx_mip) - fx_var_dict['mip'] = fx_mip - fx_var_dict = _add_fxvar_keys(fx_var_dict, var) - valid_fx_vars.append(fx_var_dict) - logger.debug("For fx variable '%s', found table '%s'", fx_varname, - fx_mip) - fx_files = _get_input_files(fx_var_dict, config_user)[0] - - # If files found, return them - if fx_files: - logger.debug("Found fx variables '%s':\n%s", fx_varname, - pformat(fx_files)) - fx_variable['mip'] = fx_mip - break + fx_cmor = project_tables[fx_info['mip']].get(fx_info['short_name']) + if fx_cmor: + found_mip = True + fx_info = _add_fxvar_keys(fx_info, variable) + fx_files = _get_input_files(fx_info, config_user)[0] # If fx variable was not found in any table, raise exception - if not searched_mips: + if not found_mip: raise RecipeError( - f"Requested fx variable '{fx_varname}' not available in " - f"any 'fx'-related CMOR table ({fx_mips}) for '{var_project}'") + f"Requested fx variable '{fx_info['short_name']}' " + f"not available in any CMOR table for '{var_project}'") # flag a warning if not fx_files: - logger.warning("Missing data for fx variable '%s'", fx_varname) + logger.warning( + "Missing data for fx variable '%s'", fx_info['short_name']) # allow for empty lists corrected for by NE masks if fx_files: - if 'fx' in fx_variable['mip']: + if fx_info['frequency'] == 'fx': fx_files = fx_files[0] - if valid_fx_vars: - valid_fx_vars = valid_fx_vars[0] - return fx_files, valid_fx_vars + return fx_files def _exclude_dataset(settings, variable, step): @@ -438,13 +421,22 @@ def _update_weighting_settings(settings, variable): def _update_fx_files(step_name, settings, variable, config_user, fx_vars): """Update settings with mask fx file list or dict.""" + if 'fx_variables' not in settings: + settings.update({'fx_variables': {}}) if not fx_vars: return + for fx_var, fx_info in fx_vars.items(): + if not fx_info: + fx_info = {} + if 'mip' not in fx_info: + fx_info.update({'mip': None}) + if 'short_name' not in fx_info: + fx_info.update({'short_name': fx_var}) + fx_files = _get_fx_files(variable, fx_info, config_user) + settings['fx_variables'].update({ + fx_var: fx_files + }) - fx_vars = [_get_fx_file(variable, fxvar, config_user) for fxvar in fx_vars] - - fx_dict = {fx_var[1]['short_name']: fx_var[0] for fx_var in fx_vars} - settings['fx_variables'] = fx_dict logger.info('Using fx_files: %s for variable %s during step %s', pformat(settings['fx_variables']), variable['short_name'], step_name) @@ -458,14 +450,14 @@ def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') if not user_fx_vars: if step_name in ('mask_landsea', 'weighting_landsea_fraction'): - user_fx_vars = ['sftlf'] + user_fx_vars = {'sftlf': None} if variable['project'] != 'obs4mips': - user_fx_vars.append('sftof') + user_fx_vars.update({'sftof': None}) elif step_name == 'mask_landseaice': - user_fx_vars = ['sftgif'] + user_fx_vars = {'sftgif': None} elif step_name in ('area_statistics', 'volume_statistics', 'zonal_statistics'): - user_fx_vars = [] + user_fx_vars = {} return user_fx_vars fx_steps = [ @@ -478,9 +470,8 @@ def _get_fx_vars_from_attribute(step_settings, step_name): _update_fx_files(step_name, step_settings, variable, config_user, fx_vars) if step_name in ['area_statistics', 'volume_statistics']: - _update_fx_files( - 'add_cell_measure', settings['add_cell_measure'], - variable, config_user, fx_vars) + settings['add_cell_measure']['fx_variables'] = ( + step_settings['fx_variables']) def _read_attributes(filename): diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 3879722435..d23d34ac81 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1870,17 +1870,22 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): landmask: mask_landsea: mask_out: sea - fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}] + fx_variables: + sftlf: + exp: piControl mask_landseaice: mask_out: sea - fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + fx_variables: + sftgif: + exp: piControl volume_statistics: operator: mean area_statistics: operator: mean - fx_variables: [{'short_name': 'areacello', 'mip': 'fx', - 'exp': 'piControl'}] - + fx_variables: + areacello: + mip: fx + exp: piControl diagnostics: diagnostic_name: variables: @@ -1924,9 +1929,9 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): # volume statistics settings = product.settings['volume_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - assert 'fx_variables' not in settings + assert settings['fx_variables'] == {} # area statistics settings = product.settings['area_statistics'] @@ -1993,14 +1998,14 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): preproc: area_statistics: operator: mean - fx_variables: [ - 'areacella', - 'areacello', - 'clayfrac', - 'sftlf', - 'sftgif', - 'sftof', - ] + fx_variables: + areacella: + areacello: + clayfrac: + sftlf: + sftgif: + mip: fx + sftof: mask_landsea: mask_out: sea @@ -2069,7 +2074,8 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: diagnostics: diagnostic_name: @@ -2115,8 +2121,10 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: [{'short_name': 'volcello', 'mip': 'Oyr', - 'exp': 'piControl'}] + fx_variables: + volcello: + mip: Oyr + exp: piControl diagnostics: diagnostic_name: @@ -2203,9 +2211,9 @@ def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, assert product.files assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - assert 'fx_variables' not in settings + assert settings['fx_variables'] == {} def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, @@ -2215,7 +2223,9 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Omon diagnostics: diagnostic_name: @@ -2261,7 +2271,9 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Oyr diagnostics: diagnostic_name: @@ -2307,7 +2319,8 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: diagnostics: diagnostic_name: @@ -2351,7 +2364,8 @@ def test_wrong_project(tmp_path, patched_datafinder, config_user): preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: diagnostics: diagnostic_name: @@ -2382,10 +2396,9 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): preproc: area_statistics: operator: mean - fx_variables: [ - 'areacella', - 'wrong_fx_variable', - ] + fx_variables: + areacella: + wrong_fx_variable: diagnostics: diagnostic_name: @@ -2404,7 +2417,7 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): scripts: null """) msg = ("Requested fx variable 'wrong_fx_variable' not available in any " - "'fx'-related CMOR table") + "CMOR table") with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG From 2ebbe59fd2c08f6bd0862c2bcaf53e1222ea312b Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 9 Feb 2021 12:44:16 +0100 Subject: [PATCH 12/56] Fix bug when only dealing with a single file --- esmvalcore/preprocessor/_other.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index 5dae052fca..497b44182a 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -73,6 +73,10 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): return cube fx_cubes = iris.cube.CubeList() for fx_files in fx_variables.values(): + if isinstance(fx_files, str): + fx_files = [fx_files] + if not fx_files: + continue for fx_file in fx_files: loaded_cube = iris.load(fx_file) short_name = loaded_cube[0].var_name @@ -123,4 +127,6 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): var_name=fx_cube.var_name, attributes=fx_cube.attributes) cube.add_cell_measure(measure, range(0, measure.ndim)) + logger.info(f'Added {fx_cube.var_name} ' + f'as cell measure in cube of {cube.var_name}') return cube From 04c4331c4d27ff99c8ab0dcca33719f54ddfecee Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 9 Feb 2021 13:07:38 +0100 Subject: [PATCH 13/56] Fix tests --- esmvalcore/_recipe.py | 2 +- tests/integration/test_recipe.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 20cfd9c37e..ee03d02ed5 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -328,6 +328,7 @@ def _add_fxvar_keys(fx_info, variable): """Add keys specific to fx variable to use get_input_filelist.""" fx_variable = deepcopy(variable) fx_variable.update(fx_info) + fx_variable['variable_group'] = fx_info['short_name'] # add special ensemble for CMIP5 only if fx_variable['project'] == 'CMIP5': @@ -336,7 +337,6 @@ def _add_fxvar_keys(fx_info, variable): # add missing cmor info _add_cmor_info(fx_variable, override=True) - return fx_variable diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index d23d34ac81..3fcd37fcd4 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2074,8 +2074,9 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: + fx_variables: volcello: + mip: Ofx diagnostics: diagnostic_name: @@ -2110,8 +2111,8 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Omon_' in fx_variables['volcello'][0] - assert '_Ofx_' not in fx_variables['volcello'][0] + assert '_Omon_' not in fx_variables['volcello'][0] + assert '_Ofx_' in fx_variables['volcello'][0] def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2121,7 +2122,7 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: + fx_variables: volcello: mip: Oyr exp: piControl @@ -2223,7 +2224,7 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: + fx_variables: volcello: mip: Omon @@ -2271,7 +2272,7 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: + fx_variables: volcello: mip: Oyr @@ -2319,7 +2320,7 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: + fx_variables: volcello: diagnostics: @@ -2364,7 +2365,7 @@ def test_wrong_project(tmp_path, patched_datafinder, config_user): preproc: volume_statistics: operator: mean - fx_variables: + fx_variables: volcello: diagnostics: From 6fa251387171bfaf9808a4d8a85afa8ea046151e Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 9 Feb 2021 16:25:08 +0100 Subject: [PATCH 14/56] Add integration tests --- .../preprocessor/_other/__init__.py | 5 + .../_other/test_add_cell_measure.py | 117 ++++++++++++++++++ tests/integration/test_recipe.py | 4 +- 3 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 tests/integration/preprocessor/_other/__init__.py create mode 100644 tests/integration/preprocessor/_other/test_add_cell_measure.py diff --git a/tests/integration/preprocessor/_other/__init__.py b/tests/integration/preprocessor/_other/__init__.py new file mode 100644 index 0000000000..5bd808bc6a --- /dev/null +++ b/tests/integration/preprocessor/_other/__init__.py @@ -0,0 +1,5 @@ +""" +Test _other.py + +Integration tests for the esmvalcore.preprocessor._other module +""" diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py new file mode 100644 index 0000000000..784d1c0136 --- /dev/null +++ b/tests/integration/preprocessor/_other/test_add_cell_measure.py @@ -0,0 +1,117 @@ +""" +Test add_cell_measure. + +Integration tests for the +:func:`esmvalcore.preprocessor._other.add_cell_measure` +function. + +""" + +import iris +import numpy as np +import pytest + +from esmvalcore.cmor.check import CheckLevels +from esmvalcore.preprocessor._other import add_cell_measure + +class Test: + """Test class.""" + @pytest.fixture(autouse=True) + def setUp(self): + """Assemble a stock cube.""" + fx_area_data = np.ones((3, 3)) + fx_volume_data = np.ones((3, 3, 3)) + self.new_cube_data = np.empty((3, 3)) + self.new_cube_data[:] = 200. + self.new_cube_3D_data = np.empty((3, 3, 3)) + self.new_cube_3D_data[:] = 200. + crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) + self.lons = iris.coords.DimCoord([0, 1.5, 3], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_east', + coord_system=crd_sys) + self.lats = iris.coords.DimCoord([0, 1.5, 3], + standard_name='latitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_north', + coord_system=crd_sys) + self.depth = iris.coords.DimCoord([0, 1.5, 3], + standard_name='depth', + bounds=[[0, 1], [1, 2], [2, 3]], + units='m', + long_name='ocean depth coordinate') + self.times = iris.coords.DimCoord([0, 1.5, 2.5, 3.5], + standard_name='time', + bounds=[[0, 1], [1, 2], [2, 3], + [3, 4]], + units='hours') + self.time2 = iris.coords.DimCoord([0, 1.5, 2.5], + standard_name='time', + bounds=[[0, 1], [1, 2], [2, 3]], + units='hours') + self.coords_spec = [(self.lats, 0), (self.lons, 1)] + self.fx_area = iris.cube.Cube(fx_area_data, + dim_coords_and_dims=self.coords_spec) + self.fx_volume = iris.cube.Cube(fx_volume_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2) + ]) + + def test_add_cell_measure_area(self, tmp_path): + """Test mask_landsea func.""" + fx_vars = { + 'areacella': {'table_id': 'fx', 'frequency': 'fx'}, + 'areacello': {'table_id': 'Ofx', 'frequency': 'fx'} + } + for fx_var in fx_vars: + self.fx_area.var_name = fx_var + self.fx_area.standard_name = 'cell_area' + self.fx_area.units = 'm2' + self.fx_area.attributes['table_id'] = fx_vars[fx_var]['table_id'] + self.fx_area.attributes['frequency'] = fx_vars[fx_var]['frequency'] + fx_file = str(tmp_path / f'{fx_var}.nc') + iris.save(self.fx_area, fx_file) + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube = add_cell_measure( + cube, {fx_var: fx_file}, 'CMIP6', + 'EC-Earth3', CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_area.standard_name) is not None + + def test_add_cell_measure_volume(self, tmp_path): + """Test mask_landsea func.""" + fx_vars = { + 'volcello': {'table_id': 'Ofx', 'frequency': 'fx'} + } + for fx_var in fx_vars: + self.fx_volume.var_name = fx_var + self.fx_volume.standard_name = 'ocean_volume' + self.fx_volume.units = 'm3' + self.fx_volume.attributes['table_id'] = ( + fx_vars[fx_var]['table_id']) + self.fx_volume.attributes['frequency'] = ( + fx_vars[fx_var]['frequency']) + fx_file = str(tmp_path / f'{fx_var}.nc') + iris.save(self.fx_volume, fx_file) + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_cell_measure( + cube, {fx_var: fx_file}, 'CMIP6', + 'EC-Earth3', CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_volume.standard_name) is not None + + def test_no_cell_measure(self): + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_cell_measure(cube, {'areacello': None}, 'CMIP6', + 'EC-Earth3', CheckLevels.IGNORE) + assert cube.cell_measures() == [] diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 3fcd37fcd4..ed6d576913 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2111,8 +2111,8 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Omon_' not in fx_variables['volcello'][0] - assert '_Ofx_' in fx_variables['volcello'][0] + assert '_Omon_' not in fx_variables['volcello'] + assert '_Ofx_' in fx_variables['volcello'] def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, From ffc79968a03f62ce333add116a898ec88484305f Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 9 Feb 2021 16:32:31 +0100 Subject: [PATCH 15/56] Fix flake --- tests/integration/preprocessor/_other/test_add_cell_measure.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py index 784d1c0136..1a88b88322 100644 --- a/tests/integration/preprocessor/_other/test_add_cell_measure.py +++ b/tests/integration/preprocessor/_other/test_add_cell_measure.py @@ -14,6 +14,7 @@ from esmvalcore.cmor.check import CheckLevels from esmvalcore.preprocessor._other import add_cell_measure + class Test: """Test class.""" @pytest.fixture(autouse=True) @@ -104,7 +105,7 @@ def test_add_cell_measure_volume(self, tmp_path): cube = add_cell_measure( cube, {fx_var: fx_file}, 'CMIP6', 'EC-Earth3', CheckLevels.IGNORE) - assert cube.cell_measure(self.fx_volume.standard_name) is not None + assert cube.cell_measure(self.fx_volume.standard_name) is not None def test_no_cell_measure(self): cube = iris.cube.Cube(self.new_cube_3D_data, From 28d7bb12c4418aa6150dec66a88932cc3dfd5808 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 9 Feb 2021 16:44:24 +0100 Subject: [PATCH 16/56] Fix flake again --- tests/integration/preprocessor/_other/test_add_cell_measure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py index 1a88b88322..6e747a14eb 100644 --- a/tests/integration/preprocessor/_other/test_add_cell_measure.py +++ b/tests/integration/preprocessor/_other/test_add_cell_measure.py @@ -105,7 +105,7 @@ def test_add_cell_measure_volume(self, tmp_path): cube = add_cell_measure( cube, {fx_var: fx_file}, 'CMIP6', 'EC-Earth3', CheckLevels.IGNORE) - assert cube.cell_measure(self.fx_volume.standard_name) is not None + assert cube.cell_measure(self.fx_volume.standard_name) is not None def test_no_cell_measure(self): cube = iris.cube.Cube(self.new_cube_3D_data, From 5a1d00976952dbadbd7702753d1ae7658650b2da Mon Sep 17 00:00:00 2001 From: sloosvel Date: Wed, 10 Feb 2021 11:33:15 +0100 Subject: [PATCH 17/56] Add more tests --- esmvalcore/preprocessor/_other.py | 11 ++- .../_other/test_add_cell_measure.py | 90 ++++++++++++++++--- 2 files changed, 85 insertions(+), 16 deletions(-) diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index 497b44182a..22144c1e24 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -114,11 +114,10 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): fx_data = da.broadcast_to( fx_cube.core_data(), cube.shape) except ValueError: - logger.error( + raise ValueError( f"Frequencies of {cube.var_name} and " - f"{measure_name} cubes do not match." + f"{fx_cube.var_name} cubes do not match." ) - raise measure = iris.coords.CellMeasure( fx_data, standard_name=fx_cube.standard_name, @@ -128,5 +127,9 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): attributes=fx_cube.attributes) cube.add_cell_measure(measure, range(0, measure.ndim)) logger.info(f'Added {fx_cube.var_name} ' - f'as cell measure in cube of {cube.var_name}') + f'as cell measure in cube of {cube.var_name}.') + else: + logger.info(f'Fx variable {fx_cube.var_name} ' + 'cannot be added as a cell measure ' + f'in cube of {cube.var_name}.') return cube diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py index 6e747a14eb..cad286e3ab 100644 --- a/tests/integration/preprocessor/_other/test_add_cell_measure.py +++ b/tests/integration/preprocessor/_other/test_add_cell_measure.py @@ -6,7 +6,7 @@ function. """ - +import logging import iris import numpy as np import pytest @@ -14,6 +14,8 @@ from esmvalcore.cmor.check import CheckLevels from esmvalcore.preprocessor._other import add_cell_measure +logger = logging.getLogger(__name__) + class Test: """Test class.""" @@ -42,15 +44,20 @@ def setUp(self): bounds=[[0, 1], [1, 2], [2, 3]], units='m', long_name='ocean depth coordinate') - self.times = iris.coords.DimCoord([0, 1.5, 2.5, 3.5], - standard_name='time', - bounds=[[0, 1], [1, 2], [2, 3], - [3, 4]], - units='hours') - self.time2 = iris.coords.DimCoord([0, 1.5, 2.5], - standard_name='time', - bounds=[[0, 1], [1, 2], [2, 3]], - units='hours') + self.monthly_times = iris.coords.DimCoord( + [15.5, 45, 74.5, 105, 135.5, 166, + 196.5, 227.5, 258, 288.5, 319, 349.5,], + standard_name='time', + bounds=[[0, 31], [31, 59], [59, 90], + [90, 120], [120, 151], [151, 181], + [181, 212], [212, 243], [243, 273], + [273, 304], [304, 334], [334, 365]], + units='days since 1950-01-01 00:00:00') + self.yearly_times = iris.coords.DimCoord( + [182.5, 547.5], + standard_name='time', + bounds=[[0, 365], [365, 730]], + units='days since 1950-01-01 00:00') self.coords_spec = [(self.lats, 0), (self.lons, 1)] self.fx_area = iris.cube.Cube(fx_area_data, dim_coords_and_dims=self.coords_spec) @@ -62,7 +69,7 @@ def setUp(self): ]) def test_add_cell_measure_area(self, tmp_path): - """Test mask_landsea func.""" + """Test add area fx variables as cell measures.""" fx_vars = { 'areacella': {'table_id': 'fx', 'frequency': 'fx'}, 'areacello': {'table_id': 'Ofx', 'frequency': 'fx'} @@ -83,7 +90,7 @@ def test_add_cell_measure_area(self, tmp_path): assert cube.cell_measure(self.fx_area.standard_name) is not None def test_add_cell_measure_volume(self, tmp_path): - """Test mask_landsea func.""" + """Test add volume as cell measure.""" fx_vars = { 'volcello': {'table_id': 'Ofx', 'frequency': 'fx'} } @@ -108,6 +115,7 @@ def test_add_cell_measure_volume(self, tmp_path): assert cube.cell_measure(self.fx_volume.standard_name) is not None def test_no_cell_measure(self): + """Test no cell measure is added.""" cube = iris.cube.Cube(self.new_cube_3D_data, dim_coords_and_dims=[ (self.depth, 0), @@ -116,3 +124,61 @@ def test_no_cell_measure(self): cube = add_cell_measure(cube, {'areacello': None}, 'CMIP6', 'EC-Earth3', CheckLevels.IGNORE) assert cube.cell_measures() == [] + + def test_invalid_cell_measure(self, tmp_path, caplog): + """Test invalid variable is not added as cell measure.""" + self.fx_area.var_name = 'sftlf' + self.fx_area.standard_name = "land_area_fraction" + self.fx_area.units = '%' + self.fx_area.attributes['table_id'] = 'fx' + self.fx_area.attributes['frequency'] = 'fx' + fx_file = str(tmp_path / f'{self.fx_area.var_name}.nc') + iris.save(self.fx_area, fx_file) + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube.var_name = 'tas' + with caplog.at_level(logging.INFO): + cube = add_cell_measure( + cube, {self.fx_area.var_name: fx_file}, 'CMIP6', + 'EC-Earth3', CheckLevels.IGNORE) + msg = (f'Fx variable {self.fx_area.var_name} ' + 'cannot be added as a cell measure ' + f'in cube of {cube.var_name}.') + assert msg in caplog.text + assert cube.cell_measures() == [] + + def test_wrong_time_frequency(self, tmp_path): + """ + Test error is raised when cube and fx cube + frequencies do not match. + """ + volume_data = np.ones((2, 3, 3, 3)) + volume_cube = iris.cube.Cube( + volume_data, + dim_coords_and_dims=[(self.yearly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + volume_cube.standard_name = 'ocean_volume' + volume_cube.var_name = 'volcello' + volume_cube.units = 'm3' + volume_cube.attributes['table_id'] = 'Oyr' + volume_cube.attributes['frequency'] = 'yr' + fx_file = str(tmp_path / f'{volume_cube.var_name}.nc') + iris.save(volume_cube, fx_file) + data = np.ones((12, 3, 3, 3)) + cube = iris.cube.Cube( + data, + dim_coords_and_dims=[(self.monthly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + cube.var_name = 'thetao' + with pytest.raises(ValueError) as excinfo: + cube = add_cell_measure( + cube, {volume_cube.var_name: fx_file}, 'CMIP6', + 'EC-Earth3', CheckLevels.IGNORE) + msg = (f"Frequencies of {cube.var_name} and " + f"{volume_cube.var_name} cubes do not match.") + assert msg in str(excinfo.value) + \ No newline at end of file From 576ca7e0d22d59cd5f94471c7d3b9ffc1f93a945 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Wed, 10 Feb 2021 11:39:50 +0100 Subject: [PATCH 18/56] Fix flake --- tests/integration/preprocessor/_other/test_add_cell_measure.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py index cad286e3ab..60af0ce35b 100644 --- a/tests/integration/preprocessor/_other/test_add_cell_measure.py +++ b/tests/integration/preprocessor/_other/test_add_cell_measure.py @@ -46,7 +46,7 @@ def setUp(self): long_name='ocean depth coordinate') self.monthly_times = iris.coords.DimCoord( [15.5, 45, 74.5, 105, 135.5, 166, - 196.5, 227.5, 258, 288.5, 319, 349.5,], + 196.5, 227.5, 258, 288.5, 319, 349.5], standard_name='time', bounds=[[0, 31], [31, 59], [59, 90], [90, 120], [120, 151], [151, 181], @@ -181,4 +181,3 @@ def test_wrong_time_frequency(self, tmp_path): msg = (f"Frequencies of {cube.var_name} and " f"{volume_cube.var_name} cubes do not match.") assert msg in str(excinfo.value) - \ No newline at end of file From 9a09475a0953121951e8fbf317777a44f03d00e2 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 19 Feb 2021 12:39:00 +0100 Subject: [PATCH 19/56] Use fx var info instead of cube attrs --- esmvalcore/_recipe.py | 33 +++---- esmvalcore/preprocessor/_other.py | 27 +++--- .../_other/test_add_cell_measure.py | 91 +++++++++++-------- tests/integration/test_recipe.py | 8 +- 4 files changed, 86 insertions(+), 73 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ee03d02ed5..727cdddf1c 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -315,9 +315,7 @@ def _get_default_settings(variable, config_user, derive=False): settings['save']['alias'] = variable['short_name'] settings['add_cell_measure'] = { - 'fx_variables': None, - 'project': variable['project'], - 'dataset': variable['dataset'], + 'fx_variables': {}, 'check_level': config_user.get('check_level', CheckLevels.DEFAULT), } @@ -397,7 +395,7 @@ def _get_fx_files(variable, fx_info, config_user): if fx_info['frequency'] == 'fx': fx_files = fx_files[0] - return fx_files + return fx_files, fx_info def _exclude_dataset(settings, variable, step): @@ -421,8 +419,8 @@ def _update_weighting_settings(settings, variable): def _update_fx_files(step_name, settings, variable, config_user, fx_vars): """Update settings with mask fx file list or dict.""" - if 'fx_variables' not in settings: - settings.update({'fx_variables': {}}) + if 'fx_variables' not in settings[step_name]: + settings[step_name].update({'fx_variables': {}}) if not fx_vars: return for fx_var, fx_info in fx_vars.items(): @@ -432,14 +430,19 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): fx_info.update({'mip': None}) if 'short_name' not in fx_info: fx_info.update({'short_name': fx_var}) - fx_files = _get_fx_files(variable, fx_info, config_user) - settings['fx_variables'].update({ + fx_files, fx_info = _get_fx_files(variable, fx_info, config_user) + settings[step_name]['fx_variables'].update({ fx_var: fx_files }) + if step_name in ['area_statistics', 'volume_statistics'] and fx_files: + fx_info['filename'] = fx_files + settings['add_cell_measure']['fx_variables'].update({ + fx_var: fx_info + }) logger.info('Using fx_files: %s for variable %s during step %s', - pformat(settings['fx_variables']), variable['short_name'], - step_name) + pformat(settings[step_name]['fx_variables']), + variable['short_name'], step_name) def _update_fx_settings(settings, variable, config_user): @@ -464,14 +467,12 @@ def _get_fx_vars_from_attribute(step_settings, step_name): 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', 'zonal_statistics', 'area_statistics', 'volume_statistics' ] - for step_name, step_settings in settings.items(): + for step_name in settings: if step_name in fx_steps: - fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) - _update_fx_files(step_name, step_settings, variable, config_user, + fx_vars = _get_fx_vars_from_attribute(settings[step_name], + step_name) + _update_fx_files(step_name, settings, variable, config_user, fx_vars) - if step_name in ['area_statistics', 'volume_statistics']: - settings['add_cell_measure']['fx_variables'] = ( - step_settings['fx_variables']) def _read_attributes(filename): diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index 22144c1e24..cc40565b72 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -41,7 +41,7 @@ def clip(cube, minimum=None, maximum=None): return cube -def add_cell_measure(cube, fx_variables, project, dataset, check_level): +def add_cell_measure(cube, fx_variables, check_level): """ Load requested fx files, check with CMOR standards and add the fx variables as cell measures in the cube containing the data. @@ -51,11 +51,7 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): cube: iris.cube.Cube Iris cube with input data. fx_variables: dict - Path to the needed fx_files. - project: str - - dataset: str - + Dictionary with fx_variable information. check_level: CheckLevels Level of strictness of the checks. @@ -72,17 +68,18 @@ def add_cell_measure(cube, fx_variables, project, dataset, check_level): if not fx_variables: return cube fx_cubes = iris.cube.CubeList() - for fx_files in fx_variables.values(): - if isinstance(fx_files, str): - fx_files = [fx_files] - if not fx_files: + for _, fx_info in fx_variables.items(): + if not fx_info: continue - for fx_file in fx_files: + if isinstance(fx_info['filename'], str): + fx_info['filename'] = [fx_info['filename']] + for fx_file in fx_info['filename']: loaded_cube = iris.load(fx_file) - short_name = loaded_cube[0].var_name - mip = loaded_cube[0].attributes['table_id'] - freq = loaded_cube[0].attributes['frequency'] - + short_name = fx_info['short_name'] + project = fx_info['project'] + dataset = fx_info['dataset'] + mip = fx_info['mip'] + freq = fx_info['frequency'] loaded_cube = fix_metadata(loaded_cube, short_name=short_name, project=project, dataset=dataset, mip=mip, frequency=freq, diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py index 60af0ce35b..724caeca46 100644 --- a/tests/integration/preprocessor/_other/test_add_cell_measure.py +++ b/tests/integration/preprocessor/_other/test_add_cell_measure.py @@ -71,48 +71,56 @@ def setUp(self): def test_add_cell_measure_area(self, tmp_path): """Test add area fx variables as cell measures.""" fx_vars = { - 'areacella': {'table_id': 'fx', 'frequency': 'fx'}, - 'areacello': {'table_id': 'Ofx', 'frequency': 'fx'} + 'areacella': { + 'short_name': 'areacella', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx'}, + 'areacello': { + 'short_name': 'areacello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx' + } } for fx_var in fx_vars: self.fx_area.var_name = fx_var self.fx_area.standard_name = 'cell_area' self.fx_area.units = 'm2' - self.fx_area.attributes['table_id'] = fx_vars[fx_var]['table_id'] - self.fx_area.attributes['frequency'] = fx_vars[fx_var]['frequency'] fx_file = str(tmp_path / f'{fx_var}.nc') + fx_vars[fx_var].update({'filename': fx_file}) iris.save(self.fx_area, fx_file) cube = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) cube = add_cell_measure( - cube, {fx_var: fx_file}, 'CMIP6', - 'EC-Earth3', CheckLevels.IGNORE) + cube, {fx_var: fx_vars[fx_var]}, CheckLevels.IGNORE) assert cube.cell_measure(self.fx_area.standard_name) is not None def test_add_cell_measure_volume(self, tmp_path): """Test add volume as cell measure.""" fx_vars = { - 'volcello': {'table_id': 'Ofx', 'frequency': 'fx'} + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx'} } - for fx_var in fx_vars: - self.fx_volume.var_name = fx_var - self.fx_volume.standard_name = 'ocean_volume' - self.fx_volume.units = 'm3' - self.fx_volume.attributes['table_id'] = ( - fx_vars[fx_var]['table_id']) - self.fx_volume.attributes['frequency'] = ( - fx_vars[fx_var]['frequency']) - fx_file = str(tmp_path / f'{fx_var}.nc') - iris.save(self.fx_volume, fx_file) - cube = iris.cube.Cube(self.new_cube_3D_data, - dim_coords_and_dims=[ - (self.depth, 0), - (self.lats, 1), - (self.lons, 2)]) - cube = add_cell_measure( - cube, {fx_var: fx_file}, 'CMIP6', - 'EC-Earth3', CheckLevels.IGNORE) - assert cube.cell_measure(self.fx_volume.standard_name) is not None + self.fx_volume.var_name = 'volcello' + self.fx_volume.standard_name = 'ocean_volume' + self.fx_volume.units = 'm3' + fx_file = str(tmp_path / 'volcello.nc') + iris.save(self.fx_volume, fx_file) + fx_vars['volcello'].update({'filename': fx_file}) + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_cell_measure(cube, fx_vars, CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_volume.standard_name) is not None def test_no_cell_measure(self): """Test no cell measure is added.""" @@ -121,8 +129,7 @@ def test_no_cell_measure(self): (self.depth, 0), (self.lats, 1), (self.lons, 2)]) - cube = add_cell_measure(cube, {'areacello': None}, 'CMIP6', - 'EC-Earth3', CheckLevels.IGNORE) + cube = add_cell_measure(cube, {'areacello': None}, CheckLevels.IGNORE) assert cube.cell_measures() == [] def test_invalid_cell_measure(self, tmp_path, caplog): @@ -130,17 +137,23 @@ def test_invalid_cell_measure(self, tmp_path, caplog): self.fx_area.var_name = 'sftlf' self.fx_area.standard_name = "land_area_fraction" self.fx_area.units = '%' - self.fx_area.attributes['table_id'] = 'fx' - self.fx_area.attributes['frequency'] = 'fx' fx_file = str(tmp_path / f'{self.fx_area.var_name}.nc') iris.save(self.fx_area, fx_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': fx_file} + } cube = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) cube.var_name = 'tas' with caplog.at_level(logging.INFO): cube = add_cell_measure( - cube, {self.fx_area.var_name: fx_file}, 'CMIP6', - 'EC-Earth3', CheckLevels.IGNORE) + cube, fx_vars, CheckLevels.IGNORE) msg = (f'Fx variable {self.fx_area.var_name} ' 'cannot be added as a cell measure ' f'in cube of {cube.var_name}.') @@ -162,10 +175,17 @@ def test_wrong_time_frequency(self, tmp_path): volume_cube.standard_name = 'ocean_volume' volume_cube.var_name = 'volcello' volume_cube.units = 'm3' - volume_cube.attributes['table_id'] = 'Oyr' - volume_cube.attributes['frequency'] = 'yr' fx_file = str(tmp_path / f'{volume_cube.var_name}.nc') iris.save(volume_cube, fx_file) + fx_vars = { + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Oyr', + 'frequency': 'yr', + 'filename': fx_file} + } data = np.ones((12, 3, 3, 3)) cube = iris.cube.Cube( data, @@ -176,8 +196,7 @@ def test_wrong_time_frequency(self, tmp_path): cube.var_name = 'thetao' with pytest.raises(ValueError) as excinfo: cube = add_cell_measure( - cube, {volume_cube.var_name: fx_file}, 'CMIP6', - 'EC-Earth3', CheckLevels.IGNORE) + cube, fx_vars, CheckLevels.IGNORE) msg = (f"Frequencies of {cube.var_name} and " f"{volume_cube.var_name} cubes do not match.") assert msg in str(excinfo.value) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index ed6d576913..f9789e612c 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -134,9 +134,7 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'frequency': 'yr', }, 'add_cell_measure': { - 'fx_variables': None, - 'project': 'CMIP5', - 'dataset': 'CanESM2', + 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, }, 'cleanup': { @@ -561,9 +559,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'frequency': 'fx', }, 'add_cell_measure': { - 'fx_variables': None, - 'project': 'CMIP5', - 'dataset': 'CanESM2', + 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, }, 'cleanup': { From cb57d8c596382e9f03a1b32ce0693e9bf383505e Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 19 Feb 2021 13:05:09 +0100 Subject: [PATCH 20/56] Fix some codacy errors --- esmvalcore/_recipe.py | 4 +--- esmvalcore/preprocessor/_area.py | 2 +- esmvalcore/preprocessor/_other.py | 15 +++++++-------- esmvalcore/preprocessor/_volume.py | 2 +- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 727cdddf1c..40e811a460 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -431,9 +431,7 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): if 'short_name' not in fx_info: fx_info.update({'short_name': fx_var}) fx_files, fx_info = _get_fx_files(variable, fx_info, config_user) - settings[step_name]['fx_variables'].update({ - fx_var: fx_files - }) + settings[step_name]['fx_variables'].update({fx_var: fx_files}) if step_name in ['area_statistics', 'volume_statistics'] and fx_files: fx_info['filename'] = fx_files settings['add_cell_measure']['fx_variables'].update({ diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 484f9f3b5b..614ef82deb 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -216,7 +216,7 @@ def area_statistics(cube, operator, fx_variables=None): logger.info( 'Cell measure "cell_area" not found in cube. ' 'Check fx_file availability.' - ) + ) logger.info('Attempting to calculate grid cell area...') if not fx_variables and cube.coord('latitude').points.ndim == 2: diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index cc40565b72..d67c68541c 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -104,17 +104,17 @@ def add_cell_measure(cube, fx_variables, check_level): 'areacella': 'area', 'areacello': 'area', 'volcello': 'volume' - } + } if fx_cube.var_name in measure_name.keys(): try: fx_data = da.broadcast_to( fx_cube.core_data(), cube.shape) - except ValueError: + except ValueError as exc: raise ValueError( f"Frequencies of {cube.var_name} and " f"{fx_cube.var_name} cubes do not match." - ) + ) from exc measure = iris.coords.CellMeasure( fx_data, standard_name=fx_cube.standard_name, @@ -123,10 +123,9 @@ def add_cell_measure(cube, fx_variables, check_level): var_name=fx_cube.var_name, attributes=fx_cube.attributes) cube.add_cell_measure(measure, range(0, measure.ndim)) - logger.info(f'Added {fx_cube.var_name} ' - f'as cell measure in cube of {cube.var_name}.') + logger.info('Added %s as cell measure in cube of %s.', + fx_cube.var_name, {cube.var_name}) else: - logger.info(f'Fx variable {fx_cube.var_name} ' - 'cannot be added as a cell measure ' - f'in cube of {cube.var_name}.') + logger.info('Fx variable %s cannot be added as a cell measure ' + 'in cube of %s.', fx_cube.var_name, cube.var_name) return cube diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 7586ebdf9b..7c6598c38c 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -217,7 +217,7 @@ def volume_statistics( logger.info( 'Cell measure "ocean_volume" not found in cube. ' 'Check fx_file availability.' - ) + ) logger.info('Attempting to calculate grid cell volume...') if not fx_variables: From 3e9e2c2e1e814a7734d532174153cb73dbed7fb8 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Thu, 11 Mar 2021 17:25:50 +0100 Subject: [PATCH 21/56] Accept fx_variables as list of dicts --- esmvalcore/_recipe.py | 23 +++-- tests/integration/test_recipe.py | 149 +++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index c5b283b0a7..ee184f308d 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -310,7 +310,7 @@ def _get_default_settings(variable, config_user, derive=False): settings['add_cell_measure'] = { 'fx_variables': {}, - 'check_level': config_user.get('check_level', CheckLevels.DEFAULT), + 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) } return settings @@ -437,12 +437,26 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): variable['short_name'], step_name) +def _fx_list_to_dict(fx_vars): + """Convert fx list to dictionary. To be deprecated at some point.""" + user_fx_vars = {} + for fx_var in fx_vars: + if isinstance(fx_var, dict): + short_name = fx_var['short_name'] + user_fx_vars.update({short_name: fx_var}) + continue + user_fx_vars.update({fx_var: None}) + return user_fx_vars + def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" # get fx variables either from user defined attribute or fixed def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') + if isinstance(user_fx_vars, list): + user_fx_vars = _fx_list_to_dict(user_fx_vars) + step_settings['fx_variables'] = user_fx_vars if not user_fx_vars: if step_name in ('mask_landsea', 'weighting_landsea_fraction'): user_fx_vars = {'sftlf': None} @@ -453,7 +467,7 @@ def _get_fx_vars_from_attribute(step_settings, step_name): elif step_name in ('area_statistics', 'volume_statistics', 'zonal_statistics'): user_fx_vars = {} - return user_fx_vars + step_settings['fx_variables'] = user_fx_vars fx_steps = [ 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', @@ -461,10 +475,9 @@ def _get_fx_vars_from_attribute(step_settings, step_name): ] for step_name in settings: if step_name in fx_steps: - fx_vars = _get_fx_vars_from_attribute(settings[step_name], - step_name) + _get_fx_vars_from_attribute(settings[step_name], step_name) _update_fx_files(step_name, settings, variable, config_user, - fx_vars) + settings[step_name]['fx_variables']) def _read_attributes(filename): diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 9131410368..656cbe95f9 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1941,6 +1941,80 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): assert '_piControl_' in fx_variables['areacello'] +def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): + content = dedent(""" + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}] + mask_landseaice: + mask_out: sea + fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + volume_statistics: + operator: mean + area_statistics: + operator: mean + fx_variables: [{'short_name': 'areacello', 'mip': 'fx', + 'exp': 'piControl'}] + diagnostics: + diagnostic_name: + variables: + gpp: + preprocessor: landmask + project: CMIP5 + mip: Lmon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1 + additional_datasets: + - {dataset: CanESM2} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check custom fx variables + task = recipe.tasks.pop() + product = task.products.pop() + + # landsea + settings = product.settings['mask_landsea'] + assert len(settings) == 2 + assert settings['mask_out'] == 'sea' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['sftlf'] + assert '_piControl_' in fx_variables['sftlf'] + + # landseaice + settings = product.settings['mask_landseaice'] + assert len(settings) == 2 + assert settings['mask_out'] == 'sea' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['sftgif'] + assert '_piControl_' in fx_variables['sftgif'] + + # volume statistics + settings = product.settings['volume_statistics'] + assert len(settings) == 2 + assert settings['operator'] == 'mean' + assert settings['fx_variables'] == {} + + # area statistics + settings = product.settings['area_statistics'] + assert len(settings) == 2 + assert settings['operator'] == 'mean' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['areacello'] + assert '_piControl_' in fx_variables['areacello'] + + def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): content = dedent(""" preprocessors: @@ -2062,6 +2136,81 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert '_Ofx_' in fx_file else: assert False + + +def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): + content = dedent(""" + preprocessors: + preproc: + area_statistics: + operator: mean + fx_variables: [ + 'areacella', + 'areacello', + 'clayfrac', + 'sftlf', + 'sftgif', + 'sftof', + ] + mask_landsea: + mask_out: sea + + diagnostics: + diagnostic_name: + variables: + tas: + preprocessor: preproc + project: CMIP6 + mip: Amon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tas' + assert len(task.products) == 1 + product = task.products.pop() + + # Check area_statistics + assert 'area_statistics' in product.settings + settings = product.settings['area_statistics'] + assert len(settings) == 2 + assert settings['operator'] == 'mean' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 6 + assert '_fx_' in fx_variables['areacella'] + assert '_Ofx_' in fx_variables['areacello'] + assert '_Efx_' in fx_variables['clayfrac'] + assert '_fx_' in fx_variables['sftlf'] + assert '_IyrAnt_' in fx_variables['sftgif'][0] + assert '_Ofx_' in fx_variables['sftof'] + + # Check mask_landsea + assert 'mask_landsea' in product.settings + settings = product.settings['mask_landsea'] + assert len(settings) == 2 + assert settings['mask_out'] == 'sea' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + fx_variables = fx_variables.values() + assert len(fx_variables) == 2 + for fx_file in fx_variables: + if 'sftlf' in fx_file: + assert '_fx_' in fx_file + elif 'sftof' in fx_file: + assert '_Ofx_' in fx_file + else: + assert False def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, From 7bf59cd3eb9c242099fe0b479bab5f6f80658853 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Thu, 11 Mar 2021 17:59:22 +0100 Subject: [PATCH 22/56] First attempt at adding ancillary vars --- esmvalcore/_recipe.py | 4 +- esmvalcore/preprocessor/__init__.py | 7 +- esmvalcore/preprocessor/_ancillary_vars.py | 121 ++++++++++++++++++ esmvalcore/preprocessor/_other.py | 90 ------------- .../_other/test_add_cell_measure.py | 17 ++- tests/integration/test_recipe.py | 6 +- 6 files changed, 138 insertions(+), 107 deletions(-) create mode 100644 esmvalcore/preprocessor/_ancillary_vars.py diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index ee184f308d..5a1050edcd 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -308,7 +308,7 @@ def _get_default_settings(variable, config_user, derive=False): if variable['short_name'] != variable['original_short_name']: settings['save']['alias'] = variable['short_name'] - settings['add_cell_measure'] = { + settings['add_fx_variables'] = { 'fx_variables': {}, 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) } @@ -428,7 +428,7 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): settings[step_name]['fx_variables'].update({fx_var: fx_files}) if step_name in ['area_statistics', 'volume_statistics'] and fx_files: fx_info['filename'] = fx_files - settings['add_cell_measure']['fx_variables'].update({ + settings['add_fx_variables']['fx_variables'].update({ fx_var: fx_info }) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 897d73028a..0503de492d 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -10,6 +10,7 @@ from .._task import BaseTask from ..cmor.check import cmor_check_data, cmor_check_metadata from ..cmor.fix import fix_data, fix_file, fix_metadata +from ._ancillary_vars import add_fx_variables from ._area import ( area_statistics, extract_named_regions, @@ -41,7 +42,7 @@ mask_outside_range, ) from ._multimodel import multi_model_statistics -from ._other import clip, add_cell_measure +from ._other import clip from ._regrid import extract_levels, extract_point, regrid from ._time import ( annual_statistics, @@ -92,7 +93,7 @@ # Data reformatting/CMORization 'fix_data', 'cmor_check_data', - 'add_cell_measure', + 'add_fx_variables', # Time extraction (as defined in the preprocessor section) 'extract_time', 'extract_season', @@ -178,7 +179,7 @@ DEFAULT_ORDER = tuple(__all__) # The order of initial and final steps cannot be configured -INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('add_cell_measure') + 1] +INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('add_fx_variables') + 1] FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('save'):] MULTI_MODEL_FUNCTIONS = { diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py new file mode 100644 index 0000000000..62975af8dd --- /dev/null +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -0,0 +1,121 @@ +""" +Preprocessor functions for ancillary variables and cell measures. +""" + +import logging +import iris + +import dask.array as da + +from esmvalcore.preprocessor._io import load, concatenate_callback, concatenate +from esmvalcore.cmor.fix import fix_metadata, fix_data +from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data + +logger = logging.getLogger(__name__) + +def _load_fx(fx_info, check_level): + fx_cubes = iris.cube.CubeList() + + for fx_file in fx_info['filename']: + loaded_cube = load(fx_file, callback=concatenate_callback) + short_name = fx_info['short_name'] + project = fx_info['project'] + dataset = fx_info['dataset'] + mip = fx_info['mip'] + freq = fx_info['frequency'] + loaded_cube = fix_metadata(loaded_cube, short_name=short_name, + project=project, dataset=dataset, + mip=mip, frequency=freq, + check_level=check_level) + fx_cubes.append(loaded_cube[0]) + + fx_cube = concatenate(fx_cubes) + + fx_cube = cmor_check_metadata(fx_cube, cmor_table=project, mip=mip, + short_name=short_name, frequency=freq, + check_level=check_level) + + fx_cube = fix_data(fx_cube, short_name=short_name, project=project, + dataset=dataset, mip=mip, frequency=freq, + check_level=check_level) + + fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, + short_name=fx_cube.var_name, frequency=freq, + check_level=check_level) + + return fx_cube + +def _add_cell_measure(cube, fx_cube, measure): + try: + fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) + except ValueError as exc: + raise ValueError( + f"Frequencies of {cube.var_name} and " + f"{fx_cube.var_name} cubes do not match." + ) from exc + measure = iris.coords.CellMeasure( + fx_data, + standard_name=fx_cube.standard_name, + units=fx_cube.units, + measure=measure, + var_name=fx_cube.var_name, + attributes=fx_cube.attributes) + cube.add_cell_measure(measure, range(0, measure.ndim)) + logger.debug('Added %s as cell measure in cube of %s.', + fx_cube.var_name, cube.var_name) + +def _add_ancillary_variable(cube, fx_cube): + ancillary_var = iris.coords.AncillaryVariable( + fx_cube.core_data(), + standard_name=fx_cube.standard_name, + units=fx_cube.units, + var_name=fx_cube.var_name, + attributes=fx_cube.attributes) + cube.add_ancillary_variable(ancillary_var, range(0, ancillary_var.ndim)) + logger.debug('Added %s as ancillary variable in cube of %s.', + fx_cube.var_name, cube.var_name) + + +def add_fx_variables(cube, fx_variables, check_level): + """ + Load requested fx files, check with CMOR standards and add the + fx variables as cell measures or ancillary variables in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_variables: dict + Dictionary with fx_variable information. + check_level: CheckLevels + Level of strictness of the checks. + + + Returns + ------- + iris.cube.Cube + Cube with added cell measures or ancillary variables. + """ + + if not fx_variables: + return cube + + for fx_info in fx_variables.values(): + if not fx_info: + continue + if isinstance(fx_info['filename'], str): + fx_info['filename'] = [fx_info['filename']] + fx_cube = _load_fx(fx_info, check_level) + + measure_name = { + 'areacella': 'area', + 'areacello': 'area', + 'volcello': 'volume' + } + + if fx_cube.var_name in measure_name.keys(): + _add_cell_measure(cube, fx_cube, measure_name[fx_cube.var_name]) + else: + _add_ancillary_variable(cube, fx_cube) + return cube diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index d67c68541c..6db21da59c 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -39,93 +39,3 @@ def clip(cube, minimum=None, maximum=None): raise ValueError("Maximum should be equal or larger than minimum.") cube.data = da.clip(cube.core_data(), minimum, maximum) return cube - - -def add_cell_measure(cube, fx_variables, check_level): - """ - Load requested fx files, check with CMOR standards and add the - fx variables as cell measures in the cube containing the data. - - Parameters - ---------- - cube: iris.cube.Cube - Iris cube with input data. - fx_variables: dict - Dictionary with fx_variable information. - check_level: CheckLevels - Level of strictness of the checks. - - - Returns - ------- - iris.cube.Cube - Cube with added cell measures. - """ - from esmvalcore.preprocessor._io import concatenate - from esmvalcore.cmor.fix import fix_metadata, fix_data - from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data - - if not fx_variables: - return cube - fx_cubes = iris.cube.CubeList() - for _, fx_info in fx_variables.items(): - if not fx_info: - continue - if isinstance(fx_info['filename'], str): - fx_info['filename'] = [fx_info['filename']] - for fx_file in fx_info['filename']: - loaded_cube = iris.load(fx_file) - short_name = fx_info['short_name'] - project = fx_info['project'] - dataset = fx_info['dataset'] - mip = fx_info['mip'] - freq = fx_info['frequency'] - loaded_cube = fix_metadata(loaded_cube, short_name=short_name, - project=project, dataset=dataset, - mip=mip, frequency=freq, - check_level=check_level) - fx_cubes.append(loaded_cube[0]) - - fx_cube = concatenate(fx_cubes) - - fx_cube = cmor_check_metadata(fx_cube, cmor_table=project, mip=mip, - short_name=short_name, frequency=freq, - check_level=check_level) - - fx_cube = fix_data(fx_cube, short_name=short_name, project=project, - dataset=dataset, mip=mip, frequency=freq, - check_level=check_level) - - fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, - short_name=fx_cube.var_name, frequency=freq, - check_level=check_level) - - measure_name = { - 'areacella': 'area', - 'areacello': 'area', - 'volcello': 'volume' - } - - if fx_cube.var_name in measure_name.keys(): - try: - fx_data = da.broadcast_to( - fx_cube.core_data(), cube.shape) - except ValueError as exc: - raise ValueError( - f"Frequencies of {cube.var_name} and " - f"{fx_cube.var_name} cubes do not match." - ) from exc - measure = iris.coords.CellMeasure( - fx_data, - standard_name=fx_cube.standard_name, - units=fx_cube.units, - measure=measure_name[fx_cube.var_name], - var_name=fx_cube.var_name, - attributes=fx_cube.attributes) - cube.add_cell_measure(measure, range(0, measure.ndim)) - logger.info('Added %s as cell measure in cube of %s.', - fx_cube.var_name, {cube.var_name}) - else: - logger.info('Fx variable %s cannot be added as a cell measure ' - 'in cube of %s.', fx_cube.var_name, cube.var_name) - return cube diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py index 724caeca46..7a91db2455 100644 --- a/tests/integration/preprocessor/_other/test_add_cell_measure.py +++ b/tests/integration/preprocessor/_other/test_add_cell_measure.py @@ -2,8 +2,7 @@ Test add_cell_measure. Integration tests for the -:func:`esmvalcore.preprocessor._other.add_cell_measure` -function. +:func:`esmvalcore.preprocessor._ancillary_vars` module. """ import logging @@ -12,7 +11,7 @@ import pytest from esmvalcore.cmor.check import CheckLevels -from esmvalcore.preprocessor._other import add_cell_measure +from esmvalcore.preprocessor._ancillary_vars import add_fx_variables logger = logging.getLogger(__name__) @@ -94,7 +93,7 @@ def test_add_cell_measure_area(self, tmp_path): iris.save(self.fx_area, fx_file) cube = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - cube = add_cell_measure( + cube = add_fx_variables( cube, {fx_var: fx_vars[fx_var]}, CheckLevels.IGNORE) assert cube.cell_measure(self.fx_area.standard_name) is not None @@ -119,7 +118,7 @@ def test_add_cell_measure_volume(self, tmp_path): (self.depth, 0), (self.lats, 1), (self.lons, 2)]) - cube = add_cell_measure(cube, fx_vars, CheckLevels.IGNORE) + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) assert cube.cell_measure(self.fx_volume.standard_name) is not None def test_no_cell_measure(self): @@ -129,10 +128,10 @@ def test_no_cell_measure(self): (self.depth, 0), (self.lats, 1), (self.lons, 2)]) - cube = add_cell_measure(cube, {'areacello': None}, CheckLevels.IGNORE) + cube = add_fx_variables(cube, {'areacello': None}, CheckLevels.IGNORE) assert cube.cell_measures() == [] - def test_invalid_cell_measure(self, tmp_path, caplog): + def test_add_ancillary_vars(self, tmp_path, caplog): """Test invalid variable is not added as cell measure.""" self.fx_area.var_name = 'sftlf' self.fx_area.standard_name = "land_area_fraction" @@ -152,7 +151,7 @@ def test_invalid_cell_measure(self, tmp_path, caplog): dim_coords_and_dims=self.coords_spec) cube.var_name = 'tas' with caplog.at_level(logging.INFO): - cube = add_cell_measure( + cube = add_fx_variables( cube, fx_vars, CheckLevels.IGNORE) msg = (f'Fx variable {self.fx_area.var_name} ' 'cannot be added as a cell measure ' @@ -195,7 +194,7 @@ def test_wrong_time_frequency(self, tmp_path): (self.lons, 3)]) cube.var_name = 'thetao' with pytest.raises(ValueError) as excinfo: - cube = add_cell_measure( + cube = add_fx_variables( cube, fx_vars, CheckLevels.IGNORE) msg = (f"Frequencies of {cube.var_name} and " f"{volume_cube.var_name} cubes do not match.") diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 656cbe95f9..2cc55207f5 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -48,7 +48,7 @@ ) DEFAULT_PREPROCESSOR_STEPS = ( - 'add_cell_measure', + 'add_fx_variables', 'cleanup', 'cmor_check_data', 'cmor_check_metadata', @@ -134,7 +134,7 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'short_name': 'chl', 'frequency': 'yr', }, - 'add_cell_measure': { + 'add_fx_variables': { 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, }, @@ -559,7 +559,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'short_name': 'sftlf', 'frequency': 'fx', }, - 'add_cell_measure': { + 'add_fx_variables': { 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, }, From 034706f1bc2c7a1d861a178dfd4f537a5a294fed Mon Sep 17 00:00:00 2001 From: sloosvel Date: Thu, 11 Mar 2021 18:32:11 +0100 Subject: [PATCH 23/56] Add docstrings --- esmvalcore/preprocessor/_ancillary_vars.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index 62975af8dd..36cee3654a 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -14,6 +14,7 @@ logger = logging.getLogger(__name__) def _load_fx(fx_info, check_level): + """Load and CMOR-check fx variables.""" fx_cubes = iris.cube.CubeList() for fx_file in fx_info['filename']: @@ -46,6 +47,7 @@ def _load_fx(fx_info, check_level): return fx_cube def _add_cell_measure(cube, fx_cube, measure): + """Add cell measure in cube.""" try: fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) except ValueError as exc: @@ -65,6 +67,7 @@ def _add_cell_measure(cube, fx_cube, measure): fx_cube.var_name, cube.var_name) def _add_ancillary_variable(cube, fx_cube): + """Add ancillary variable in cube.""" ancillary_var = iris.coords.AncillaryVariable( fx_cube.core_data(), standard_name=fx_cube.standard_name, From 6367c5fa8c481287acc9fcb1c2f8604fc2b99110 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 12 Mar 2021 09:48:36 +0100 Subject: [PATCH 24/56] Move tests --- esmvalcore/preprocessor/_ancillary_vars.py | 2 +- .../preprocessor/_ancillary_vars/__init__.py | 5 + .../_ancillary_vars/test_add_fx_variables.py | 194 ++++++++++++++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tests/integration/preprocessor/_ancillary_vars/__init__.py create mode 100644 tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index 36cee3654a..aa7c70bcb2 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -117,7 +117,7 @@ def add_fx_variables(cube, fx_variables, check_level): 'volcello': 'volume' } - if fx_cube.var_name in measure_name.keys(): + if fx_cube.var_name in measure_name: _add_cell_measure(cube, fx_cube, measure_name[fx_cube.var_name]) else: _add_ancillary_variable(cube, fx_cube) diff --git a/tests/integration/preprocessor/_ancillary_vars/__init__.py b/tests/integration/preprocessor/_ancillary_vars/__init__.py new file mode 100644 index 0000000000..88b606fc48 --- /dev/null +++ b/tests/integration/preprocessor/_ancillary_vars/__init__.py @@ -0,0 +1,5 @@ +""" +Test _ancillary_vars.py + +Integration tests for the esmvalcore.preprocessor._ancillary_vars module +""" diff --git a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py new file mode 100644 index 0000000000..ba8ffbeb42 --- /dev/null +++ b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py @@ -0,0 +1,194 @@ +""" +Test add_fx_variables. + +Integration tests for the +:func:`esmvalcore.preprocessor._ancillary_vars` module. + +""" +import logging +import iris +import numpy as np +import pytest + +from esmvalcore.cmor.check import CheckLevels +from esmvalcore.preprocessor._ancillary_vars import add_fx_variables + +logger = logging.getLogger(__name__) + + +class Test: + """Test class.""" + @pytest.fixture(autouse=True) + def setUp(self): + """Assemble a stock cube.""" + fx_area_data = np.ones((3, 3)) + fx_volume_data = np.ones((3, 3, 3)) + self.new_cube_data = np.empty((3, 3)) + self.new_cube_data[:] = 200. + self.new_cube_3D_data = np.empty((3, 3, 3)) + self.new_cube_3D_data[:] = 200. + crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) + self.lons = iris.coords.DimCoord([0, 1.5, 3], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_east', + coord_system=crd_sys) + self.lats = iris.coords.DimCoord([0, 1.5, 3], + standard_name='latitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_north', + coord_system=crd_sys) + self.depth = iris.coords.DimCoord([0, 1.5, 3], + standard_name='depth', + bounds=[[0, 1], [1, 2], [2, 3]], + units='m', + long_name='ocean depth coordinate') + self.monthly_times = iris.coords.DimCoord( + [15.5, 45, 74.5, 105, 135.5, 166, + 196.5, 227.5, 258, 288.5, 319, 349.5], + standard_name='time', + bounds=[[0, 31], [31, 59], [59, 90], + [90, 120], [120, 151], [151, 181], + [181, 212], [212, 243], [243, 273], + [273, 304], [304, 334], [334, 365]], + units='days since 1950-01-01 00:00:00') + self.yearly_times = iris.coords.DimCoord( + [182.5, 547.5], + standard_name='time', + bounds=[[0, 365], [365, 730]], + units='days since 1950-01-01 00:00') + self.coords_spec = [(self.lats, 0), (self.lons, 1)] + self.fx_area = iris.cube.Cube(fx_area_data, + dim_coords_and_dims=self.coords_spec) + self.fx_volume = iris.cube.Cube(fx_volume_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2) + ]) + + def test_add_cell_measure_area(self, tmp_path): + """Test add area fx variables as cell measures.""" + fx_vars = { + 'areacella': { + 'short_name': 'areacella', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx'}, + 'areacello': { + 'short_name': 'areacello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx' + } + } + for fx_var in fx_vars: + self.fx_area.var_name = fx_var + self.fx_area.standard_name = 'cell_area' + self.fx_area.units = 'm2' + fx_file = str(tmp_path / f'{fx_var}.nc') + fx_vars[fx_var].update({'filename': fx_file}) + iris.save(self.fx_area, fx_file) + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube = add_fx_variables( + cube, {fx_var: fx_vars[fx_var]}, CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_area.standard_name) is not None + + def test_add_cell_measure_volume(self, tmp_path): + """Test add volume as cell measure.""" + fx_vars = { + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx'} + } + self.fx_volume.var_name = 'volcello' + self.fx_volume.standard_name = 'ocean_volume' + self.fx_volume.units = 'm3' + fx_file = str(tmp_path / 'volcello.nc') + iris.save(self.fx_volume, fx_file) + fx_vars['volcello'].update({'filename': fx_file}) + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_volume.standard_name) is not None + + def test_no_cell_measure(self): + """Test no cell measure is added.""" + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_fx_variables(cube, {'areacello': None}, CheckLevels.IGNORE) + assert cube.cell_measures() == [] + + def test_add_ancillary_vars(self, tmp_path): + """Test invalid variable is not added as cell measure.""" + self.fx_area.var_name = 'sftlf' + self.fx_area.standard_name = "land_area_fraction" + self.fx_area.units = '%' + fx_file = str(tmp_path / f'{self.fx_area.var_name}.nc') + iris.save(self.fx_area, fx_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': fx_file} + } + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) + assert cube.ancillary_variable(self.fx_area.standard_name) is not None + + def test_wrong_time_frequency(self, tmp_path): + """ + Test error is raised when cube and fx cube + frequencies do not match. + """ + volume_data = np.ones((2, 3, 3, 3)) + volume_cube = iris.cube.Cube( + volume_data, + dim_coords_and_dims=[(self.yearly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + volume_cube.standard_name = 'ocean_volume' + volume_cube.var_name = 'volcello' + volume_cube.units = 'm3' + fx_file = str(tmp_path / f'{volume_cube.var_name}.nc') + iris.save(volume_cube, fx_file) + fx_vars = { + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Oyr', + 'frequency': 'yr', + 'filename': fx_file} + } + data = np.ones((12, 3, 3, 3)) + cube = iris.cube.Cube( + data, + dim_coords_and_dims=[(self.monthly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + cube.var_name = 'thetao' + with pytest.raises(ValueError) as excinfo: + cube = add_fx_variables( + cube, fx_vars, CheckLevels.IGNORE) + msg = (f"Frequencies of {cube.var_name} and " + f"{volume_cube.var_name} cubes do not match.") + assert msg in str(excinfo.value) From de337ef021a8c6c0b0cc417e928d4bb1a3d692fe Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 12 Mar 2021 10:05:47 +0100 Subject: [PATCH 25/56] Update test and fix flake --- esmvalcore/_recipe.py | 1 + esmvalcore/preprocessor/_ancillary_vars.py | 13 ++++++------- tests/integration/test_recipe.py | 2 +- tests/unit/preprocessor/_other/test_other.py | 1 - 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 9d29375bda..6761c70f4b 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -449,6 +449,7 @@ def _fx_list_to_dict(fx_vars): user_fx_vars.update({fx_var: None}) return user_fx_vars + def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index aa7c70bcb2..c2d9d46922 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -1,6 +1,4 @@ -""" -Preprocessor functions for ancillary variables and cell measures. -""" +"""Preprocessor functions for ancillary variables and cell measures.""" import logging import iris @@ -13,6 +11,7 @@ logger = logging.getLogger(__name__) + def _load_fx(fx_info, check_level): """Load and CMOR-check fx variables.""" fx_cubes = iris.cube.CubeList() @@ -46,15 +45,14 @@ def _load_fx(fx_info, check_level): return fx_cube + def _add_cell_measure(cube, fx_cube, measure): """Add cell measure in cube.""" try: fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) except ValueError as exc: - raise ValueError( - f"Frequencies of {cube.var_name} and " - f"{fx_cube.var_name} cubes do not match." - ) from exc + raise ValueError(f"Frequencies of {cube.var_name} and " + f"{fx_cube.var_name} cubes do not match.") from exc measure = iris.coords.CellMeasure( fx_data, standard_name=fx_cube.standard_name, @@ -66,6 +64,7 @@ def _add_cell_measure(cube, fx_cube, measure): logger.debug('Added %s as cell measure in cube of %s.', fx_cube.var_name, cube.var_name) + def _add_ancillary_variable(cube, fx_cube): """Add ancillary variable in cube.""" ancillary_var = iris.coords.AncillaryVariable( diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 8e1a40d452..a22d404595 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2140,7 +2140,7 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert '_Ofx_' in fx_file else: assert False - + def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): content = dedent(""" diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py index 6335a74f64..08a1ee26c0 100644 --- a/tests/unit/preprocessor/_other/test_other.py +++ b/tests/unit/preprocessor/_other/test_other.py @@ -2,7 +2,6 @@ import unittest -import iris import iris.coord_categorisation import iris.coords import numpy as np From c92cce746978ee5169f77ce0d283c3b559d90565 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 12 Mar 2021 10:06:35 +0100 Subject: [PATCH 26/56] Remove old tests --- .../preprocessor/_other/__init__.py | 5 - .../_other/test_add_cell_measure.py | 201 ------------------ 2 files changed, 206 deletions(-) delete mode 100644 tests/integration/preprocessor/_other/__init__.py delete mode 100644 tests/integration/preprocessor/_other/test_add_cell_measure.py diff --git a/tests/integration/preprocessor/_other/__init__.py b/tests/integration/preprocessor/_other/__init__.py deleted file mode 100644 index 5bd808bc6a..0000000000 --- a/tests/integration/preprocessor/_other/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Test _other.py - -Integration tests for the esmvalcore.preprocessor._other module -""" diff --git a/tests/integration/preprocessor/_other/test_add_cell_measure.py b/tests/integration/preprocessor/_other/test_add_cell_measure.py deleted file mode 100644 index 7a91db2455..0000000000 --- a/tests/integration/preprocessor/_other/test_add_cell_measure.py +++ /dev/null @@ -1,201 +0,0 @@ -""" -Test add_cell_measure. - -Integration tests for the -:func:`esmvalcore.preprocessor._ancillary_vars` module. - -""" -import logging -import iris -import numpy as np -import pytest - -from esmvalcore.cmor.check import CheckLevels -from esmvalcore.preprocessor._ancillary_vars import add_fx_variables - -logger = logging.getLogger(__name__) - - -class Test: - """Test class.""" - @pytest.fixture(autouse=True) - def setUp(self): - """Assemble a stock cube.""" - fx_area_data = np.ones((3, 3)) - fx_volume_data = np.ones((3, 3, 3)) - self.new_cube_data = np.empty((3, 3)) - self.new_cube_data[:] = 200. - self.new_cube_3D_data = np.empty((3, 3, 3)) - self.new_cube_3D_data[:] = 200. - crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) - self.lons = iris.coords.DimCoord([0, 1.5, 3], - standard_name='longitude', - bounds=[[0, 1], [1, 2], [2, 3]], - units='degrees_east', - coord_system=crd_sys) - self.lats = iris.coords.DimCoord([0, 1.5, 3], - standard_name='latitude', - bounds=[[0, 1], [1, 2], [2, 3]], - units='degrees_north', - coord_system=crd_sys) - self.depth = iris.coords.DimCoord([0, 1.5, 3], - standard_name='depth', - bounds=[[0, 1], [1, 2], [2, 3]], - units='m', - long_name='ocean depth coordinate') - self.monthly_times = iris.coords.DimCoord( - [15.5, 45, 74.5, 105, 135.5, 166, - 196.5, 227.5, 258, 288.5, 319, 349.5], - standard_name='time', - bounds=[[0, 31], [31, 59], [59, 90], - [90, 120], [120, 151], [151, 181], - [181, 212], [212, 243], [243, 273], - [273, 304], [304, 334], [334, 365]], - units='days since 1950-01-01 00:00:00') - self.yearly_times = iris.coords.DimCoord( - [182.5, 547.5], - standard_name='time', - bounds=[[0, 365], [365, 730]], - units='days since 1950-01-01 00:00') - self.coords_spec = [(self.lats, 0), (self.lons, 1)] - self.fx_area = iris.cube.Cube(fx_area_data, - dim_coords_and_dims=self.coords_spec) - self.fx_volume = iris.cube.Cube(fx_volume_data, - dim_coords_and_dims=[ - (self.depth, 0), - (self.lats, 1), - (self.lons, 2) - ]) - - def test_add_cell_measure_area(self, tmp_path): - """Test add area fx variables as cell measures.""" - fx_vars = { - 'areacella': { - 'short_name': 'areacella', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx'}, - 'areacello': { - 'short_name': 'areacello', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'Ofx', - 'frequency': 'fx' - } - } - for fx_var in fx_vars: - self.fx_area.var_name = fx_var - self.fx_area.standard_name = 'cell_area' - self.fx_area.units = 'm2' - fx_file = str(tmp_path / f'{fx_var}.nc') - fx_vars[fx_var].update({'filename': fx_file}) - iris.save(self.fx_area, fx_file) - cube = iris.cube.Cube(self.new_cube_data, - dim_coords_and_dims=self.coords_spec) - cube = add_fx_variables( - cube, {fx_var: fx_vars[fx_var]}, CheckLevels.IGNORE) - assert cube.cell_measure(self.fx_area.standard_name) is not None - - def test_add_cell_measure_volume(self, tmp_path): - """Test add volume as cell measure.""" - fx_vars = { - 'volcello': { - 'short_name': 'volcello', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'Ofx', - 'frequency': 'fx'} - } - self.fx_volume.var_name = 'volcello' - self.fx_volume.standard_name = 'ocean_volume' - self.fx_volume.units = 'm3' - fx_file = str(tmp_path / 'volcello.nc') - iris.save(self.fx_volume, fx_file) - fx_vars['volcello'].update({'filename': fx_file}) - cube = iris.cube.Cube(self.new_cube_3D_data, - dim_coords_and_dims=[ - (self.depth, 0), - (self.lats, 1), - (self.lons, 2)]) - cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) - assert cube.cell_measure(self.fx_volume.standard_name) is not None - - def test_no_cell_measure(self): - """Test no cell measure is added.""" - cube = iris.cube.Cube(self.new_cube_3D_data, - dim_coords_and_dims=[ - (self.depth, 0), - (self.lats, 1), - (self.lons, 2)]) - cube = add_fx_variables(cube, {'areacello': None}, CheckLevels.IGNORE) - assert cube.cell_measures() == [] - - def test_add_ancillary_vars(self, tmp_path, caplog): - """Test invalid variable is not added as cell measure.""" - self.fx_area.var_name = 'sftlf' - self.fx_area.standard_name = "land_area_fraction" - self.fx_area.units = '%' - fx_file = str(tmp_path / f'{self.fx_area.var_name}.nc') - iris.save(self.fx_area, fx_file) - fx_vars = { - 'sftlf': { - 'short_name': 'sftlf', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx', - 'filename': fx_file} - } - cube = iris.cube.Cube(self.new_cube_data, - dim_coords_and_dims=self.coords_spec) - cube.var_name = 'tas' - with caplog.at_level(logging.INFO): - cube = add_fx_variables( - cube, fx_vars, CheckLevels.IGNORE) - msg = (f'Fx variable {self.fx_area.var_name} ' - 'cannot be added as a cell measure ' - f'in cube of {cube.var_name}.') - assert msg in caplog.text - assert cube.cell_measures() == [] - - def test_wrong_time_frequency(self, tmp_path): - """ - Test error is raised when cube and fx cube - frequencies do not match. - """ - volume_data = np.ones((2, 3, 3, 3)) - volume_cube = iris.cube.Cube( - volume_data, - dim_coords_and_dims=[(self.yearly_times, 0), - (self.depth, 1), - (self.lats, 2), - (self.lons, 3)]) - volume_cube.standard_name = 'ocean_volume' - volume_cube.var_name = 'volcello' - volume_cube.units = 'm3' - fx_file = str(tmp_path / f'{volume_cube.var_name}.nc') - iris.save(volume_cube, fx_file) - fx_vars = { - 'volcello': { - 'short_name': 'volcello', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'Oyr', - 'frequency': 'yr', - 'filename': fx_file} - } - data = np.ones((12, 3, 3, 3)) - cube = iris.cube.Cube( - data, - dim_coords_and_dims=[(self.monthly_times, 0), - (self.depth, 1), - (self.lats, 2), - (self.lons, 3)]) - cube.var_name = 'thetao' - with pytest.raises(ValueError) as excinfo: - cube = add_fx_variables( - cube, fx_vars, CheckLevels.IGNORE) - msg = (f"Frequencies of {cube.var_name} and " - f"{volume_cube.var_name} cubes do not match.") - assert msg in str(excinfo.value) From 392b6c0a42569bdcd992a033c7109f97594e7df7 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 12 Mar 2021 10:46:25 +0100 Subject: [PATCH 27/56] Update documentation --- doc/recipe/preprocessor.rst | 71 +++++++++++++++++++++++++++++++ esmvalcore/preprocessor/_other.py | 1 - 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index b28a1c8c44..85dbf37419 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -9,6 +9,7 @@ roughly following the default order in which preprocessor functions are applied: * :ref:`Variable derivation` * :ref:`CMOR check and dataset-specific fixes` +* :ref:`Fx variables as cell measures or ancillary variables` * :ref:`Vertical interpolation` * :ref:`Weighting` * :ref:`Land/Sea/Ice masking` @@ -175,6 +176,16 @@ steps: To get an overview on data fixes and how to implement new ones, please go to :ref:`fixing_data`. +.. _Fx variables as cell measures or ancillary variables: + +Fx variables as cell measures or ancillary variables +==================================================== +Preprocessors steps related to spatial statistics or masking may require +the use of ``fx_variables`` to be able to perform the computations. +The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, +checks them against the CMOR standards and adds them as either a ``cell_measure`` +or an ``ancillary_variable`` inside the cube of data. This ensures that the +defined preprocessor chain is applied to both the variables and the fx_variables. .. _Vertical interpolation: @@ -331,6 +342,20 @@ experiment is preferred for fx data retrieval: sftof: exp: piControl +or alternatively: + +.. code-block:: yaml + + preprocessors: + preproc_weighting: + weighting_landsea_fraction: + area_type: land + exclude: ['CanESM2', 'reference_dataset'] + fx_variables: [ + {short_name: sftlf, exp: piControl}, + {short_name: sftof, exp: piControl} + ] + See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -397,6 +422,19 @@ experiment is preferred for fx data retrieval: sftof: exp: piControl +or alternatively: + +.. code-block:: yaml + + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: [ + {short_name: sftlf, exp: piControl}, + {short_name: sftof, exp: piControl} + ] + If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the preprocessor attempts to mask the data using Natural Earth mask files (that are @@ -439,6 +477,15 @@ experiment is preferred for fx data retrieval: sftgif: exp: piControl +or alternatively: + +.. code-block:: yaml + + preprocessors: + landseaicemask: + mask_landseaice: + mask_out: sea + fx_variables: [{short_name: sftgif, exp: piControl}] See also :func:`esmvalcore.preprocessor.mask_landseaice`. @@ -1295,6 +1342,18 @@ as a CMOR variable can permit): volcello: mip: fx +Alternatively, the ``fx_variables`` argument can also be specified as a list: + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as a list of dictionaries: + +.. code-block:: yaml + + fx_variables: [{short_name: areacello, mip: Omon}, {short_name: volcello, mip: fx}] + The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. @@ -1361,6 +1420,18 @@ as a CMOR variable can permit): volcello: mip: fx +Alternatively, the ``fx_variables`` argument can also be specified as a list: + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as a list of dictionaries: + +.. code-block:: yaml + + fx_variables: [{short_name: areacello, mip: Omon}, {short_name: volcello, mip: fx}] + The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py index 6db21da59c..697e8b3347 100644 --- a/esmvalcore/preprocessor/_other.py +++ b/esmvalcore/preprocessor/_other.py @@ -3,7 +3,6 @@ """ import logging -import iris import dask.array as da From 1ba36ae534afa9ed87829fe61bc8adb3e069a7dc Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 12 Mar 2021 18:21:24 +0100 Subject: [PATCH 28/56] Broadcast ancillary vars to shape --- esmvalcore/preprocessor/_ancillary_vars.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index c2d9d46922..555e2d03f5 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -67,8 +67,13 @@ def _add_cell_measure(cube, fx_cube, measure): def _add_ancillary_variable(cube, fx_cube): """Add ancillary variable in cube.""" + try: + fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) + except ValueError as exc: + raise ValueError(f"Frequencies of {cube.var_name} and " + f"{fx_cube.var_name} cubes do not match.") from exc ancillary_var = iris.coords.AncillaryVariable( - fx_cube.core_data(), + fx_data, standard_name=fx_cube.standard_name, units=fx_cube.units, var_name=fx_cube.var_name, From 298606e9dfd0297e04d1d864be947af30ff3b8df Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 12 Mar 2021 18:23:47 +0100 Subject: [PATCH 29/56] Remove fx_variables attribute --- esmvalcore/_recipe.py | 9 ++++----- esmvalcore/preprocessor/_area.py | 6 ++---- esmvalcore/preprocessor/_volume.py | 9 +-------- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 6761c70f4b..6261c21cfd 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -414,8 +414,6 @@ def _update_weighting_settings(settings, variable): def _update_fx_files(step_name, settings, variable, config_user, fx_vars): """Update settings with mask fx file list or dict.""" - if 'fx_variables' not in settings[step_name]: - settings[step_name].update({'fx_variables': {}}) if not fx_vars: return for fx_var, fx_info in fx_vars.items(): @@ -426,15 +424,14 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): if 'short_name' not in fx_info: fx_info.update({'short_name': fx_var}) fx_files, fx_info = _get_fx_files(variable, fx_info, config_user) - settings[step_name]['fx_variables'].update({fx_var: fx_files}) - if step_name in ['area_statistics', 'volume_statistics'] and fx_files: + if fx_files: fx_info['filename'] = fx_files settings['add_fx_variables']['fx_variables'].update({ fx_var: fx_info }) logger.info('Using fx_files: %s for variable %s during step %s', - pformat(settings[step_name]['fx_variables']), + pformat(settings['add_fx_variables']['fx_variables']), variable['short_name'], step_name) @@ -480,6 +477,8 @@ def _get_fx_vars_from_attribute(step_settings, step_name): _get_fx_vars_from_attribute(settings[step_name], step_name) _update_fx_files(step_name, settings, variable, config_user, settings[step_name]['fx_variables']) + if 'fx_variables' in settings[step_name]: + settings[step_name].pop('fx_variables', None) def _read_attributes(filename): diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 614ef82deb..63bb8db080 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -157,7 +157,7 @@ def meridional_statistics(cube, operator): raise ValueError(msg) -def area_statistics(cube, operator, fx_variables=None): +def area_statistics(cube, operator): """Apply a statistical operator in the horizontal direction. The average in the horizontal direction. We assume that the @@ -194,8 +194,6 @@ def area_statistics(cube, operator, fx_variables=None): operator: str The operation, options: mean, median, min, max, std_dev, sum, variance, rms. - fx_variables: dict - dictionary of field:filename for the fx_variables Returns ------- @@ -219,7 +217,7 @@ def area_statistics(cube, operator, fx_variables=None): ) logger.info('Attempting to calculate grid cell area...') - if not fx_variables and cube.coord('latitude').points.ndim == 2: + if cube.coord('latitude').points.ndim == 2: coord_names = [coord.standard_name for coord in cube.coords()] if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 7c6598c38c..5e27805315 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -174,10 +174,7 @@ def calculate_volume(cube): return grid_volume -def volume_statistics( - cube, - operator, - fx_variables=None): +def volume_statistics(cube, operator): """ Apply a statistical operation over a volume. @@ -191,8 +188,6 @@ def volume_statistics( Input cube. operator: str The operation to apply to the cube, options are: 'mean'. - fx_variables: dict - dictionary of field:filename for the fx_variables Returns ------- @@ -219,8 +214,6 @@ def volume_statistics( 'Check fx_file availability.' ) logger.info('Attempting to calculate grid cell volume...') - - if not fx_variables: grid_volume = calculate_volume(cube) if cube.data.shape != grid_volume.shape: From c41dbdf3b365b50ff8eb14f9b07cf0f1e656be69 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 12 Mar 2021 18:25:22 +0100 Subject: [PATCH 30/56] Use ancillary vars in mask and weighting preprocs --- esmvalcore/preprocessor/_mask.py | 94 +++------ esmvalcore/preprocessor/_weighting.py | 51 ++--- .../preprocessor/_mask/test_mask.py | 88 ++++++-- tests/unit/preprocessor/_mask/test_mask.py | 7 +- .../test_weighting_landsea_fraction.py | 188 +++++------------- 5 files changed, 164 insertions(+), 264 deletions(-) diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index 88e1ced169..7a85a9e3d1 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -20,28 +20,6 @@ logger = logging.getLogger(__name__) -def _check_dims(cube, mask_cube): - """Check for same ndim and x-y dimensions for data and mask cubes.""" - x_dim = cube.coord('longitude').points.ndim - y_dim = cube.coord('latitude').points.ndim - mx_dim = mask_cube.coord('longitude').points.ndim - my_dim = mask_cube.coord('latitude').points.ndim - len_x = len(cube.coord('longitude').points) - len_y = len(cube.coord('latitude').points) - len_mx = len(mask_cube.coord('longitude').points) - len_my = len(mask_cube.coord('latitude').points) - if (x_dim == mx_dim and y_dim == my_dim and len_x == len_mx - and len_y == len_my): - logger.debug('Data cube and fx mask have same dims') - return True - - logger.debug( - 'Data cube and fx mask differ in dims: ' - 'cube: ((%i, %i), grid=(%i, %i)), mask: ((%i, %i), grid=(%i, %i))', - x_dim, y_dim, len_x, len_y, mx_dim, my_dim, len_mx, len_my) - return False - - def _get_fx_mask(fx_data, fx_option, mask_type): """Build a percentage-thresholded mask from an fx file.""" inmask = np.zeros_like(fx_data, bool) @@ -86,7 +64,7 @@ def _apply_fx_mask(fx_mask, var_data): return var_data -def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): +def mask_landsea(cube, mask_out, always_use_ne_mask=False): """ Mask out either land mass or sea (oceans, seas and lakes). @@ -100,9 +78,6 @@ def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): cube: iris.cube.Cube data cube to be masked. - fx_variables: dict - dict: keys: fx variables, values: full paths to fx files. - mask_out: str either "land" to mask out land mass or "sea" to mask out seas. @@ -131,30 +106,24 @@ def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): 'sea': os.path.join(cwd, 'ne_masks/ne_50m_ocean.shp') } - fx_files = fx_variables.values() - if any(fx_files) and not always_use_ne_mask: - fx_cubes = {} - for fx_file in fx_files: - if not fx_file: - continue - fxfile_members = os.path.basename(fx_file).split('_') - for fx_root in ['sftlf', 'sftof']: - if fx_root in fxfile_members: - fx_cubes[fx_root] = iris.load_cube(fx_file) - + if not always_use_ne_mask: # preserve importance order: try stflf first then sftof - if ('sftlf' in fx_cubes.keys() - and _check_dims(cube, fx_cubes['sftlf'])): - landsea_mask = _get_fx_mask(fx_cubes['sftlf'].data, mask_out, - 'sftlf') - cube.data = _apply_fx_mask(landsea_mask, cube.data) - logger.debug("Applying land-sea mask: sftlf") - elif ('sftof' in fx_cubes.keys() - and _check_dims(cube, fx_cubes['sftof'])): - landsea_mask = _get_fx_mask(fx_cubes['sftof'].data, mask_out, - 'sftof') + fx_cube = None + try: + fx_cube = cube.ancillary_variable('land_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + try: + fx_cube = cube.ancillary_variable('sea_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + logger.debug( + 'Ancillary variables land/sea area fraction ' + 'not found in cube. Check fx_file availability.') + + if fx_cube: + landsea_mask = _get_fx_mask( + fx_cube.data, mask_out, fx_cube.var_name) cube.data = _apply_fx_mask(landsea_mask, cube.data) - logger.debug("Applying land-sea mask: sftof") + logger.debug("Applying land-sea mask: %s", fx_cube.var_name) else: if cube.coord('longitude').points.ndim < 2: cube = _mask_with_shp(cube, shapefiles[mask_out], [ @@ -183,7 +152,7 @@ def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): return cube -def mask_landseaice(cube, fx_variables, mask_out): +def mask_landseaice(cube, mask_out): """ Mask out either landsea (combined) or ice. @@ -195,9 +164,6 @@ def mask_landseaice(cube, fx_variables, mask_out): cube: iris.cube.Cube data cube to be masked. - fx_variables: dict - dict: keys: fx variables, values: full paths to fx files. - mask_out: str either "landsea" to mask out landsea or "ice" to mask out ice. @@ -215,20 +181,16 @@ def mask_landseaice(cube, fx_variables, mask_out): """ # sftgif is the only one so far but users can set others - fx_files = fx_variables.values() - if any(fx_files): - for fx_file in fx_files: - if not fx_file: - continue - fx_cube = iris.load_cube(fx_file) - - if _check_dims(cube, fx_cube): - landice_mask = _get_fx_mask(fx_cube.data, mask_out, 'sftgif') - cube.data = _apply_fx_mask(landice_mask, cube.data) - logger.debug("Applying landsea-ice mask: sftgif") - else: - msg = "Landsea-ice mask and data have different dimensions." - raise ValueError(msg) + fx_cube = None + try: + fx_cube = cube.ancillary_variable('land_ice_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + logger.debug('Ancillary variables land ice area fraction ' + 'not found in cube. Check fx_file availability.') + if fx_cube: + landice_mask = _get_fx_mask(fx_cube.data, mask_out, fx_cube.var_name) + cube.data = _apply_fx_mask(landice_mask, cube.data) + logger.debug("Applying landsea-ice mask: sftgif") else: msg = "Landsea-ice mask could not be found. Stopping. " raise ValueError(msg) diff --git a/esmvalcore/preprocessor/_weighting.py b/esmvalcore/preprocessor/_weighting.py index b786684135..6879e0b5e0 100644 --- a/esmvalcore/preprocessor/_weighting.py +++ b/esmvalcore/preprocessor/_weighting.py @@ -7,43 +7,31 @@ logger = logging.getLogger(__name__) -def _get_land_fraction(cube, fx_variables): +def _get_land_fraction(cube): """Extract land fraction as :mod:`dask.array`.""" + fx_cube = None land_fraction = None errors = [] - if not fx_variables: - errors.append("No fx files given.") - return (land_fraction, errors) - for (fx_var, fx_path) in fx_variables.items(): - if not fx_path: - errors.append(f"File for '{fx_var}' not found.") - continue - fx_cube = iris.load_cube(fx_path) - if not _shape_is_broadcastable(fx_cube.shape, cube.shape): + try: + fx_cube = cube.ancillary_variable('land_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + try: + fx_cube = cube.ancillary_variable('sea_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: errors.append( - f"Cube '{fx_var}' with shape {fx_cube.shape} not " - f"broadcastable to cube '{cube.var_name}' with shape " - f"{cube.shape}.") - continue - if fx_var == 'sftlf': - land_fraction = fx_cube.core_data() / 100.0 - break - if fx_var == 'sftof': - land_fraction = 1.0 - fx_cube.core_data() / 100.0 - break - errors.append( - f"Cannot calculate land fraction from '{fx_var}', expected " - f"'sftlf' or 'sftof'.") - return (land_fraction, errors) + 'Ancillary variables land/sea area fraction ' + 'not found in cube. Check fx_file availability.') + return (land_fraction, errors) + if fx_cube.var_name == 'sftlf': + land_fraction = fx_cube.core_data() / 100.0 + if fx_cube.var_name == 'sftof': + land_fraction = 1.0 - fx_cube.core_data() / 100.0 -def _shape_is_broadcastable(shape_1, shape_2): - """Check if two :mod:`numpy.array' shapes are broadcastable.""" - return all((m == n) or (m == 1) or (n == 1) - for (m, n) in zip(shape_1[::-1], shape_2[::-1])) + return (land_fraction, errors) -def weighting_landsea_fraction(cube, fx_variables, area_type): +def weighting_landsea_fraction(cube, area_type): """Weight fields using land or sea fraction. This preprocessor function weights a field with its corresponding land or @@ -58,9 +46,6 @@ def weighting_landsea_fraction(cube, fx_variables, area_type): ---------- cube : iris.cube.Cube Data cube to be weighted. - fx_variables : dict - Dictionary holding ``var_name`` (keys) and full paths (values) to the - fx files as ``str`` or empty ``list`` (if not available). area_type : str Use land (``'land'``) or sea (``'sea'``) fraction for weighting. @@ -81,7 +66,7 @@ def weighting_landsea_fraction(cube, fx_variables, area_type): if area_type not in ('land', 'sea'): raise TypeError( f"Expected 'land' or 'sea' for area_type, got '{area_type}'") - (land_fraction, errors) = _get_land_fraction(cube, fx_variables) + (land_fraction, errors) = _get_land_fraction(cube) if land_fraction is None: raise ValueError( f"Weighting of '{cube.var_name}' with '{area_type}' fraction " diff --git a/tests/integration/preprocessor/_mask/test_mask.py b/tests/integration/preprocessor/_mask/test_mask.py index 5ddb30a228..4e2ef513f8 100644 --- a/tests/integration/preprocessor/_mask/test_mask.py +++ b/tests/integration/preprocessor/_mask/test_mask.py @@ -10,8 +10,10 @@ import numpy as np import pytest +from esmvalcore.cmor.check import CheckLevels from esmvalcore.preprocessor import (PreprocessorFile, mask_fillvalues, - mask_landsea, mask_landseaice) + mask_landsea, mask_landseaice, + add_fx_variables) from tests import assert_array_equal @@ -47,58 +49,90 @@ def setUp(self): units='hours') self.coords_spec = [(self.lats, 0), (self.lons, 1)] self.fx_mask = iris.cube.Cube(fx_data, - dim_coords_and_dims=self.coords_spec) + dim_coords_and_dims=self.coords_spec, + units='%') self.mock_data = np.ma.empty((4, 3, 3)) self.mock_data[:] = 10. - def test_components_fx_dict(self, tmp_path): - """Test compatibility of input fx dictionary.""" + def test_components_fx_var(self, tmp_path): + """Test compatibility of ancillary variables.""" + self.fx_mask.var_name = 'sftlf' + self.fx_mask.standard_name = 'land_area_fraction' sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, - { - 'sftlf': sftlf_file, - 'sftof': [], - }, 'land', ) assert isinstance(result_land, iris.cube.Cube) + self.fx_mask.var_name = 'sftgif' + self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) + fx_vars = { + 'sftgif': { + 'short_name': 'sftgif', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, + CheckLevels.IGNORE) result_ice = mask_landseaice( new_cube_ice, - { - 'sftgif': sftgif_file, - 'sftof': [], - }, 'ice', ) assert isinstance(result_ice, iris.cube.Cube) def test_mask_landsea(self, tmp_path): """Test mask_landsea func.""" + self.fx_mask.var_name = 'sftlf' + self.fx_mask.standard_name = 'land_area_fraction' sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, + CheckLevels.IGNORE) # mask with fx files result_land = mask_landsea( new_cube_land, - {'sftlf': sftlf_file}, 'land', ) result_sea = mask_landsea( new_cube_sea, - {'sftlf': sftlf_file}, 'sea', ) expected = np.ma.empty((3, 3)) @@ -117,17 +151,19 @@ def test_mask_landsea(self, tmp_path): # Mask with shp files although sftlf is available new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, + CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, - {'sftlf': sftlf_file}, 'land', always_use_ne_mask=True, ) result_sea = mask_landsea( new_cube_sea, - {'sftlf': sftlf_file}, 'sea', always_use_ne_mask=True, ) @@ -145,8 +181,8 @@ def test_mask_landsea(self, tmp_path): dim_coords_and_dims=self.coords_spec) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_land = mask_landsea(new_cube_land, {}, 'land') - result_sea = mask_landsea(new_cube_sea, {}, 'sea') + result_land = mask_landsea(new_cube_land, 'land') + result_sea = mask_landsea(new_cube_sea, 'sea') # bear in mind all points are in the ocean np.ma.set_fill_value(result_land.data, 1e+20) @@ -158,12 +194,24 @@ def test_mask_landsea(self, tmp_path): def test_mask_landseaice(self, tmp_path): """Test mask_landseaice func.""" + self.fx_mask.var_name = 'sftgif' + self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) + fx_vars = { + 'sftgif': { + 'short_name': 'sftgif', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftgif_file} + } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_ice = mask_landseaice(new_cube_ice, {'sftgif': sftgif_file}, - 'ice') + new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, + CheckLevels.IGNORE) + result_ice = mask_landseaice(new_cube_ice, 'ice') expected = np.ma.empty((3, 3)) expected.data[:] = 200. expected.mask = np.ones((3, 3), bool) diff --git a/tests/unit/preprocessor/_mask/test_mask.py b/tests/unit/preprocessor/_mask/test_mask.py index 2d4c8948e6..549726e713 100644 --- a/tests/unit/preprocessor/_mask/test_mask.py +++ b/tests/unit/preprocessor/_mask/test_mask.py @@ -7,7 +7,7 @@ import iris import tests from cf_units import Unit -from esmvalcore.preprocessor._mask import (_apply_fx_mask, _check_dims, +from esmvalcore.preprocessor._mask import (_apply_fx_mask, count_spells, _get_fx_mask, mask_above_threshold, mask_below_threshold, @@ -63,11 +63,6 @@ def test_apply_fx_mask_on_masked_data(self): mask=dummy_fx_mask) self.assert_array_equal(fixed_mask, app_mask) - def test_check_dims(self): - """Test _check_dims func.""" - malformed_cube = self.arr[0] - np.testing.assert_equal(True, _check_dims(self.arr, self.arr)) - np.testing.assert_equal(False, _check_dims(self.arr, malformed_cube)) def test_count_spells(self): """Test count_spells func.""" diff --git a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py index 387279ec9e..cda6c551ab 100644 --- a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py +++ b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py @@ -8,95 +8,69 @@ import esmvalcore.preprocessor._weighting as weighting +crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) +LON_3 = iris.coords.DimCoord([0, 1.5, 3], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_east', + coord_system=crd_sys) +LON_4 = iris.coords.DimCoord([0, 1.5, 2.5, 3.5], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3], + [3, 4]], + units='degrees_east', + coord_system=crd_sys) + CUBE_SFTLF = iris.cube.Cube( [10.0, 0.0, 100.0], var_name='sftlf', standard_name='land_area_fraction', units=Unit('%'), + dim_coords_and_dims=[(LON_3, 0),] ) CUBE_SFTOF = iris.cube.Cube( [100.0, 0.0, 50.0, 70.0], var_name='sftof', standard_name='sea_area_fraction', units=Unit('%'), + dim_coords_and_dims=[(LON_4, 0),] ) CUBE_3 = iris.cube.Cube( [10.0, 20.0, 0.0], var_name='dim3', + dim_coords_and_dims=[(LON_3, 0),] ) CUBE_4 = iris.cube.Cube( [1.0, 2.0, -1.0, 2.0], var_name='dim4', + dim_coords_and_dims=[(LON_4, 0),] ) + +CUBE_ANCILLARY_3 = CUBE_3.copy() +CUBE_ANCILLARY_3.add_ancillary_variable(CUBE_SFTLF, (0)) + +CUBE_ANCILLARY_4 = CUBE_4.copy() +CUBE_ANCILLARY_4.add_ancillary_variable(CUBE_SFTOF, (0)) + FRAC_SFTLF = np.array([0.1, 0.0, 1.0]) FRAC_SFTOF = np.array([0.0, 1.0, 0.5, 0.3]) -EMPTY_FX_FILES = { - 'sftlf': [], - 'sftof': [], -} -L_FX_FILES = { - 'sftlf': 'not/a/real/path', - 'sftof': [], -} -O_FX_FILES = { - 'sftlf': [], - 'sftof': 'not/a/real/path', -} -FX_FILES = { - 'sftlf': 'not/a/real/path', - 'sftof': 'i/was/mocked', -} -WRONG_FX_FILES = { - 'wrong': 'test', - 'sftlf': 'not/a/real/path', - 'sftof': 'i/was/mocked', -} LAND_FRACTION = [ - (CUBE_3, {}, [], None, ["No fx files given"]), - (CUBE_3, {'sftlf': []}, [], None, ["'sftlf' not found"]), - (CUBE_3, {'sftlf': 'a'}, [CUBE_SFTLF], FRAC_SFTLF, []), - (CUBE_3, {'sftof': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), - (CUBE_3, EMPTY_FX_FILES, [], None, - ["'sftlf' not found", "'sftof' not found"]), - (CUBE_3, L_FX_FILES, [CUBE_SFTLF], FRAC_SFTLF, []), - (CUBE_3, O_FX_FILES, [CUBE_SFTOF], None, - ["'sftlf' not found", "not broadcastable"]), - (CUBE_3, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, []), - (CUBE_3, {'wrong': 'a'}, [CUBE_SFTLF], None, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_3, {'wrong': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), - (CUBE_3, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_3, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, - ["not broadcastable"]), - (CUBE_4, {}, [], None, ["No fx files given"]), - (CUBE_4, {'sftlf': []}, [], None, ["'sftlf' not found"]), - (CUBE_4, {'sftlf': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), - (CUBE_4, {'sftof': 'a'}, [CUBE_SFTOF], FRAC_SFTOF, []), - (CUBE_4, EMPTY_FX_FILES, [], None, - ["'sftlf' not found", "'sftof' not found"]), - (CUBE_4, L_FX_FILES, [CUBE_SFTLF], None, - ["not broadcastable", "'sftof' not found"]), - (CUBE_4, O_FX_FILES, [CUBE_SFTOF], FRAC_SFTOF, ["'sftlf' not found"]), - (CUBE_4, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["not broadcastable"]), - (CUBE_4, {'wrong': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), - (CUBE_4, {'wrong': 'a'}, [CUBE_SFTOF], None, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_4, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["not broadcastable", "not broadcastable"]), - (CUBE_4, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["expected 'sftlf' or 'sftof'", "not broadcastable"]), + (CUBE_3, None, [ + 'Ancillary variables land/sea area fraction not found in cube. ' + 'Check fx_file availability.']), + (CUBE_4, None, [ + 'Ancillary variables land/sea area fraction not found in cube. ' + 'Check fx_file availability.']), + (CUBE_ANCILLARY_3, FRAC_SFTLF, []), + (CUBE_ANCILLARY_4, FRAC_SFTOF, []) ] -@pytest.mark.parametrize('cube,fx_files,fx_cubes,out,err', LAND_FRACTION) -@mock.patch.object(weighting, 'iris', autospec=True) -def test_get_land_fraction(mock_iris, cube, fx_files, fx_cubes, out, err): +@pytest.mark.parametrize('cube,out,err', LAND_FRACTION) +def test_get_land_fraction(cube, out, err): """Test calculation of land fraction.""" - mock_iris.load_cube.side_effect = fx_cubes - (land_fraction, errors) = weighting._get_land_fraction(cube, fx_files) + (land_fraction, errors) = weighting._get_land_fraction(cube) if land_fraction is None: assert land_fraction == out else: @@ -104,46 +78,6 @@ def test_get_land_fraction(mock_iris, cube, fx_files, fx_cubes, out, err): assert len(errors) == len(err) for (idx, error) in enumerate(errors): assert err[idx] in error - mock_iris.reset_mock() - - -SHAPES_TO_BROADCAST = [ - ((), (1, ), True), - ((), (10, 10), True), - ((1, ), (10, ), True), - ((1, ), (10, 10), True), - ((2, ), (10, ), False), - ((10, ), (), True), - ((10, ), (1, ), True), - ((10, ), (10, ), True), - ((10, ), (10, 10), True), - ((10, ), (7, 1), True), - ((10, ), (10, 7), False), - ((10, ), (7, 1, 10), True), - ((10, ), (7, 1, 1), True), - ((10, ), (7, 1, 7), False), - ((10, ), (7, 10, 7), False), - ((10, 1), (1, 1), True), - ((10, 1), (1, 100), True), - ((10, 1), (10, 7), True), - ((10, 12), (10, 1), True), - ((10, 12), (), True), - ((10, 12), (1, ), True), - ((10, 12), (12, ), True), - ((10, 12), (1, 1), True), - ((10, 12), (1, 12), True), - ((10, 12), (10, 10, 1), True), - ((10, 12), (10, 12, 1), False), - ((10, 12), (10, 12, 12), False), - ((10, 12), (10, 10, 12), True), -] - - -@pytest.mark.parametrize('shape_1,shape_2,out', SHAPES_TO_BROADCAST) -def test_shape_is_broadcastable(shape_1, shape_2, out): - """Test check if two shapes are broadcastable.""" - is_broadcastable = weighting._shape_is_broadcastable(shape_1, shape_2) - assert is_broadcastable == out CUBE_3_L = CUBE_3.copy([1.0, 0.0, 0.0]) @@ -152,37 +86,20 @@ def test_shape_is_broadcastable(shape_1, shape_2, out): CUBE_4_O = CUBE_4.copy([1.0, 0.0, -0.5, 1.4]) WEIGHTING_LANDSEA_FRACTION = [ - (CUBE_3, {}, 'land', ValueError), - (CUBE_3, {}, 'sea', ValueError), - (CUBE_3, EMPTY_FX_FILES, 'land', ValueError), - (CUBE_3, EMPTY_FX_FILES, 'sea', ValueError), - (CUBE_3, L_FX_FILES, 'land', CUBE_3_L), - (CUBE_3, L_FX_FILES, 'sea', CUBE_3_O), - (CUBE_3, O_FX_FILES, 'land', ValueError), - (CUBE_3, O_FX_FILES, 'sea', ValueError), - (CUBE_3, FX_FILES, 'land', CUBE_3_L), - (CUBE_3, FX_FILES, 'sea', CUBE_3_O), - (CUBE_3, FX_FILES, 'wrong', TypeError), - (CUBE_4, {}, 'land', ValueError), - (CUBE_4, {}, 'sea', ValueError), - (CUBE_4, EMPTY_FX_FILES, 'land', ValueError), - (CUBE_4, EMPTY_FX_FILES, 'sea', ValueError), - (CUBE_4, L_FX_FILES, 'land', ValueError), - (CUBE_4, L_FX_FILES, 'sea', ValueError), - (CUBE_4, O_FX_FILES, 'land', CUBE_4_L), - (CUBE_4, O_FX_FILES, 'sea', CUBE_4_O), - (CUBE_4, FX_FILES, 'land', CUBE_4_L), - (CUBE_4, FX_FILES, 'sea', CUBE_4_O), - (CUBE_4, FX_FILES, 'wrong', TypeError), + (CUBE_3, 'land', ValueError), + (CUBE_3, 'sea', ValueError), + (CUBE_ANCILLARY_3, 'land', CUBE_3_L), + (CUBE_ANCILLARY_3, 'sea', CUBE_3_O), + (CUBE_4, 'land', ValueError), + (CUBE_4, 'sea', ValueError), + (CUBE_ANCILLARY_4, 'land', CUBE_4_L), + (CUBE_ANCILLARY_4, 'sea', CUBE_4_O), ] -@pytest.mark.parametrize('cube,fx_files,area_type,out', +@pytest.mark.parametrize('cube,area_type,out', WEIGHTING_LANDSEA_FRACTION) -@mock.patch.object(weighting, 'iris', autospec=True) -def test_weighting_landsea_fraction(mock_iris, - cube, - fx_files, +def test_weighting_landsea_fraction(cube, area_type, out): """Test landsea fraction weighting preprocessor.""" @@ -190,18 +107,11 @@ def test_weighting_landsea_fraction(mock_iris, if isinstance(out, type): with pytest.raises(out): weighted_cube = weighting.weighting_landsea_fraction( - cube, fx_files, area_type) + cube, area_type) return # Regular cases - fx_cubes = [] - if fx_files.get('sftlf'): - fx_cubes.append(CUBE_SFTLF) - if fx_files.get('sftof'): - fx_cubes.append(CUBE_SFTOF) - mock_iris.load_cube.side_effect = fx_cubes - weighted_cube = weighting.weighting_landsea_fraction( - cube, fx_files, area_type) - assert weighted_cube == cube + weighted_cube = weighting.weighting_landsea_fraction(cube, area_type) + assert np.array_equal(weighted_cube.data, cube.data) assert weighted_cube is cube - mock_iris.reset_mock() + \ No newline at end of file From 641e611d7fcc14b7caf574831453c9a3e4258322 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Sat, 13 Mar 2021 14:27:46 +0100 Subject: [PATCH 31/56] Fix tests due to fx_variables argument removal --- esmvalcore/preprocessor/_mask.py | 2 +- tests/integration/test_recipe.py | 201 ++++++++++++------------------- 2 files changed, 80 insertions(+), 123 deletions(-) diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index 7a85a9e3d1..5898151450 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -186,7 +186,7 @@ def mask_landseaice(cube, mask_out): fx_cube = cube.ancillary_variable('land_ice_area_fraction') except iris.exceptions.AncillaryVariableNotFoundError: logger.debug('Ancillary variables land ice area fraction ' - 'not found in cube. Check fx_file availability.') + 'not found in cube. Check fx_file availability.') if fx_cube: landice_mask = _get_fx_mask(fx_cube.data, mask_out, fx_cube.var_name) cube.data = _apply_fx_mask(landice_mask, cube.data) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index a22d404595..6bbcf6ed6a 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1664,9 +1664,9 @@ def test_weighting_landsea_fraction(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) if product.attributes['project'] == 'obs4mips': assert len(fx_variables) == 1 @@ -1715,18 +1715,12 @@ def test_weighting_landsea_fraction_no_fx(tmp_path, patched_failing_datafinder, for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - if product.attributes['project'] == 'obs4mips': - assert len(fx_variables) == 1 - assert fx_variables['sftlf'] == [] - else: - assert len(fx_variables) == 2 - assert fx_variables['sftlf'] == [] - assert fx_variables['sftof'] == [] + assert len(fx_variables) == 0 def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, @@ -1772,13 +1766,9 @@ def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, continue assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert fx_variables.get('sftlf') def test_weighting_landsea_fraction_exclude_fail(tmp_path, patched_datafinder, @@ -1852,9 +1842,9 @@ def test_landmask(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() if product.attributes['project'] == 'obs4mips': @@ -1908,39 +1898,33 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): # landsea settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftlf'] - assert '_piControl_' in fx_variables['sftlf'] + assert len(fx_variables) == 3 + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # landseaice settings = product.settings['mask_landseaice'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftgif'] - assert '_piControl_' in fx_variables['sftgif'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # volume statistics settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - assert settings['fx_variables'] == {} + assert 'volcello' not in fx_variables # area statistics settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['areacello'] - assert '_piControl_' in fx_variables['areacello'] + assert '_fx_' in fx_variables['areacello']['filename'] + assert '_piControl_' in fx_variables['areacello']['filename'] def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): @@ -1982,39 +1966,33 @@ def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): # landsea settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftlf'] - assert '_piControl_' in fx_variables['sftlf'] + assert len(fx_variables) == 3 + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # landseaice settings = product.settings['mask_landseaice'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftgif'] - assert '_piControl_' in fx_variables['sftgif'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # volume statistics settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - assert settings['fx_variables'] == {} + assert 'volcello' not in fx_variables # area statistics settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['areacello'] - assert '_piControl_' in fx_variables['areacello'] + assert '_fx_' in fx_variables['areacello']['filename'] + assert '_piControl_' in fx_variables['areacello']['filename'] def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): @@ -2056,10 +2034,10 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 3 + assert len(settings) == 2 assert settings['mask_out'] == 'sea' assert settings['always_use_ne_mask'] is False - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() assert not any(fx_variables) @@ -2109,37 +2087,26 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert len(task.products) == 1 product = task.products.pop() - # Check area_statistics + # Check area_statistics assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella'] - assert '_Ofx_' in fx_variables['areacello'] - assert '_Efx_' in fx_variables['clayfrac'] - assert '_fx_' in fx_variables['sftlf'] - assert '_fx_' in fx_variables['sftgif'] - assert '_Ofx_' in fx_variables['sftof'] + assert '_fx_' in fx_variables['areacella']['filename'] + assert '_Ofx_' in fx_variables['areacello']['filename'] + assert '_Efx_' in fx_variables['clayfrac']['filename'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_Ofx_' in fx_variables['sftof']['filename'] # Check mask_landsea assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - fx_variables = fx_variables.values() - assert len(fx_variables) == 2 - for fx_file in fx_variables: - if 'sftlf' in fx_file: - assert '_fx_' in fx_file - elif 'sftof' in fx_file: - assert '_Ofx_' in fx_file - else: - assert False def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): @@ -2187,34 +2154,23 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): # Check area_statistics assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella'] - assert '_Ofx_' in fx_variables['areacello'] - assert '_Efx_' in fx_variables['clayfrac'] - assert '_fx_' in fx_variables['sftlf'] - assert '_IyrAnt_' in fx_variables['sftgif'][0] - assert '_Ofx_' in fx_variables['sftof'] + assert '_fx_' in fx_variables['areacella']['filename'] + assert '_Ofx_' in fx_variables['areacello']['filename'] + assert '_Efx_' in fx_variables['clayfrac']['filename'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_IyrAnt_' in fx_variables['sftgif']['filename'][0] + assert '_Ofx_' in fx_variables['sftof']['filename'] # Check mask_landsea assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - fx_variables = fx_variables.values() - assert len(fx_variables) == 2 - for fx_file in fx_variables: - if 'sftlf' in fx_file: - assert '_fx_' in fx_file - elif 'sftof' in fx_file: - assert '_Ofx_' in fx_file - else: - assert False def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2258,13 +2214,13 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Omon_' not in fx_variables['volcello'] - assert '_Ofx_' in fx_variables['volcello'] + assert '_Omon_' not in fx_variables['volcello']['filename'] + assert '_Ofx_' in fx_variables['volcello']['filename'] def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2307,14 +2263,14 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Oyr_' in fx_variables['volcello'][0] - assert '_piControl_' in fx_variables['volcello'][0] - assert '_Omon_' not in fx_variables['volcello'][0] + assert '_Oyr_' in fx_variables['volcello']['filename'][0] + assert '_piControl_' in fx_variables['volcello']['filename'][0] + assert '_Omon_' not in fx_variables['volcello']['filename'][0] def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, @@ -2364,9 +2320,10 @@ def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, assert product.files assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - assert settings['fx_variables'] == {} + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert fx_variables == {} def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, @@ -2408,13 +2365,13 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'][0] - assert '_Omon_' in fx_variables['volcello'][0] + assert '_Ofx_' not in fx_variables['volcello']['filename'][0] + assert '_Omon_' in fx_variables['volcello']['filename'][0] def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, @@ -2456,13 +2413,13 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'][0] - assert '_Oyr_' in fx_variables['volcello'][0] + assert '_Ofx_' not in fx_variables['volcello']['filename'][0] + assert '_Oyr_' in fx_variables['volcello']['filename'][0] def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, @@ -2502,13 +2459,13 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['volcello'] - assert '_Omon_' not in fx_variables['volcello'] + assert '_fx_' in fx_variables['volcello']['filename'] + assert '_Omon_' not in fx_variables['volcello']['filename'] def test_wrong_project(tmp_path, patched_datafinder, config_user): From 004cca4a810748c26bc08f746e134eea3cc068ec Mon Sep 17 00:00:00 2001 From: sloosvel Date: Sat, 13 Mar 2021 14:47:41 +0100 Subject: [PATCH 32/56] Fix flake --- tests/integration/test_recipe.py | 2 +- tests/unit/preprocessor/_mask/test_mask.py | 1 - .../_weighting/test_weighting_landsea_fraction.py | 10 ++++------ 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 6bbcf6ed6a..a4babcf410 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2087,7 +2087,7 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert len(task.products) == 1 product = task.products.pop() - # Check area_statistics + # Check area_statistics assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] assert len(settings) == 1 diff --git a/tests/unit/preprocessor/_mask/test_mask.py b/tests/unit/preprocessor/_mask/test_mask.py index 549726e713..a6b28e2cae 100644 --- a/tests/unit/preprocessor/_mask/test_mask.py +++ b/tests/unit/preprocessor/_mask/test_mask.py @@ -63,7 +63,6 @@ def test_apply_fx_mask_on_masked_data(self): mask=dummy_fx_mask) self.assert_array_equal(fixed_mask, app_mask) - def test_count_spells(self): """Test count_spells func.""" ref_spells = count_spells(self.time_cube.data, -1000., 0, 1) diff --git a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py index cda6c551ab..2de0ff3c80 100644 --- a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py +++ b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py @@ -1,5 +1,4 @@ """Unit tests for :mod:`esmvalcore.preprocessor._weighting`.""" -from unittest import mock import iris import numpy as np @@ -26,24 +25,24 @@ var_name='sftlf', standard_name='land_area_fraction', units=Unit('%'), - dim_coords_and_dims=[(LON_3, 0),] + dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_SFTOF = iris.cube.Cube( [100.0, 0.0, 50.0, 70.0], var_name='sftof', standard_name='sea_area_fraction', units=Unit('%'), - dim_coords_and_dims=[(LON_4, 0),] + dim_coords_and_dims=[(LON_4, 0), ] ) CUBE_3 = iris.cube.Cube( [10.0, 20.0, 0.0], var_name='dim3', - dim_coords_and_dims=[(LON_3, 0),] + dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_4 = iris.cube.Cube( [1.0, 2.0, -1.0, 2.0], var_name='dim4', - dim_coords_and_dims=[(LON_4, 0),] + dim_coords_and_dims=[(LON_4, 0), ] ) CUBE_ANCILLARY_3 = CUBE_3.copy() @@ -114,4 +113,3 @@ def test_weighting_landsea_fraction(cube, weighted_cube = weighting.weighting_landsea_fraction(cube, area_type) assert np.array_equal(weighted_cube.data, cube.data) assert weighted_cube is cube - \ No newline at end of file From 796b3f1edd529f50d0789f7ed1f425cc73c840f8 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Sat, 13 Mar 2021 15:55:24 +0100 Subject: [PATCH 33/56] Skip uneeded broadcast --- esmvalcore/preprocessor/_mask.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index 5898151450..bf4dbba74b 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -2,7 +2,7 @@ Mask module. Module that performs a number of masking -operations that include: masking with fx files, masking with +operations that include: masking with ancillary variables, masking with Natural Earth shapefiles (land or ocean), masking on thresholds, missing values masking. """ @@ -50,16 +50,12 @@ def _get_fx_mask(fx_data, fx_option, mask_type): def _apply_fx_mask(fx_mask, var_data): """Apply the fx data extracted mask on the actual processed data.""" - # Broadcast mask - var_mask = np.zeros_like(var_data, bool) - var_mask = np.broadcast_to(fx_mask, var_mask.shape).copy() - # Apply mask across if np.ma.is_masked(var_data): - var_mask |= var_data.mask + fx_mask |= var_data.mask # Build the new masked data - var_data = np.ma.array(var_data, mask=var_mask, fill_value=1e+20) + var_data = np.ma.array(var_data, mask=fx_mask, fill_value=1e+20) return var_data @@ -68,10 +64,12 @@ def mask_landsea(cube, mask_out, always_use_ne_mask=False): """ Mask out either land mass or sea (oceans, seas and lakes). - It uses dedicated fx files (sftlf or sftof) or, in their absence, it - applies a Natural Earth mask (land or ocean contours). Note that the - Natural Earth masks have different resolutions: 10m for land, and 50m - for seas; these are more than enough for ESMValTool puprpose. + It uses dedicated ancillary variables (sftlf or sftof) or, + in their absence, it applies a + Natural Earth mask (land or ocean contours). + Note that the Natural Earth masks have different resolutions: + 10m for land, and 50m for seas. + These are more than enough for ESMValTool purposes. Parameters ---------- @@ -157,7 +155,9 @@ def mask_landseaice(cube, mask_out): Mask out either landsea (combined) or ice. Function that masks out either landsea (land and seas) or ice (Antarctica - and Greenland and some wee glaciers). It uses dedicated fx files (sftgif). + and Greenland and some wee glaciers). + + It uses dedicated ancillary variables (sftgif). Parameters ---------- @@ -175,9 +175,7 @@ def mask_landseaice(cube, mask_out): Raises ------ ValueError - Error raised if fx mask and data have different dimensions. - ValueError - Error raised if fx files list is empty. + Error raised if landsea-ice mask not found as an ancillary variable. """ # sftgif is the only one so far but users can set others @@ -185,7 +183,7 @@ def mask_landseaice(cube, mask_out): try: fx_cube = cube.ancillary_variable('land_ice_area_fraction') except iris.exceptions.AncillaryVariableNotFoundError: - logger.debug('Ancillary variables land ice area fraction ' + logger.debug('Ancillary variable land ice area fraction ' 'not found in cube. Check fx_file availability.') if fx_cube: landice_mask = _get_fx_mask(fx_cube.data, mask_out, fx_cube.var_name) From 0882274283eee2d772ab4a1acf23deb433832eaa Mon Sep 17 00:00:00 2001 From: sloosvel Date: Sat, 13 Mar 2021 16:25:53 +0100 Subject: [PATCH 34/56] Remove zonal_stats from fx_steps --- esmvalcore/_recipe.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 6261c21cfd..f324b74374 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -463,22 +463,23 @@ def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars.update({'sftof': None}) elif step_name == 'mask_landseaice': user_fx_vars = {'sftgif': None} - elif step_name in ('area_statistics', 'volume_statistics', - 'zonal_statistics'): + elif step_name in ('area_statistics', 'volume_statistics'): user_fx_vars = {} step_settings['fx_variables'] = user_fx_vars fx_steps = [ 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', - 'zonal_statistics', 'area_statistics', 'volume_statistics' + 'area_statistics', 'volume_statistics' ] for step_name in settings: if step_name in fx_steps: _get_fx_vars_from_attribute(settings[step_name], step_name) _update_fx_files(step_name, settings, variable, config_user, settings[step_name]['fx_variables']) - if 'fx_variables' in settings[step_name]: - settings[step_name].pop('fx_variables', None) + # Remove unused attribute in 'fx_steps' preprocessors. + # The fx_variables information is saved in + # the 'add_fx_variables' step. + settings[step_name].pop('fx_variables', None) def _read_attributes(filename): From 763ee9c7738b4310cab26e9f645c8d8562828248 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Sat, 13 Mar 2021 16:26:36 +0100 Subject: [PATCH 35/56] Make functions public --- esmvalcore/preprocessor/_ancillary_vars.py | 47 +++++++++++++++++++--- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index 555e2d03f5..c8667becdc 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -46,8 +46,28 @@ def _load_fx(fx_info, check_level): return fx_cube -def _add_cell_measure(cube, fx_cube, measure): - """Add cell measure in cube.""" +def add_cell_measure(cube, fx_cube, measure): + """ + Add cell_measure in the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_cube: iris.cube.Cube + Iris cube with fx data. + measure: str + Name of the measure, can be 'area' or 'volume'. + + + Returns + ------- + iris.cube.Cube + Cube with added ancillary variables + """ + if 'measure' not in ['area', 'volume']: + raise ValueError(f"measure name must be 'area or volume, " + f"got {measure} instead") try: fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) except ValueError as exc: @@ -65,8 +85,23 @@ def _add_cell_measure(cube, fx_cube, measure): fx_cube.var_name, cube.var_name) -def _add_ancillary_variable(cube, fx_cube): - """Add ancillary variable in cube.""" +def add_ancillary_variable(cube, fx_cube): + """ + Add ancillary variables in the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_cube: iris.cube.Cube + Iris cube with fx data. + + + Returns + ------- + iris.cube.Cube + Cube with added ancillary variables + """ try: fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) except ValueError as exc: @@ -122,7 +157,7 @@ def add_fx_variables(cube, fx_variables, check_level): } if fx_cube.var_name in measure_name: - _add_cell_measure(cube, fx_cube, measure_name[fx_cube.var_name]) + add_cell_measure(cube, fx_cube, measure_name[fx_cube.var_name]) else: - _add_ancillary_variable(cube, fx_cube) + add_ancillary_variable(cube, fx_cube) return cube From d0fb5cd6ca3a31445bcc79f37b8b18ba84c01082 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Sat, 13 Mar 2021 16:27:28 +0100 Subject: [PATCH 36/56] Improve documentation --- doc/recipe/preprocessor.rst | 41 ++++++++++++++++++++++----- esmvalcore/preprocessor/_weighting.py | 3 +- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 85dbf37419..d3bd4e62ec 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -187,6 +187,33 @@ checks them against the CMOR standards and adds them as either a ``cell_measure` or an ``ancillary_variable`` inside the cube of data. This ensures that the defined preprocessor chain is applied to both the variables and the fx_variables. +Note that when calling steps that require ``fx_variables`` inside of diagnostics +scripts, the variables are expected to contain the required ``cell_measures`` or +``ancillary_variables``. If missing, they can be added using the following functions: + +.. code-block:: + + from esmvalcore.preprocessor import (add_cell_measure, + add_ancillary_variable, + area_statistics, + volume_statistics, + mask_landsea, + mask_landseaice, + weighting_landsea_fraction) + + cube_with_area_measure = add_cell_measure(cube, area_cube, 'area') + result_area = area_statistics(cube_with_area_measure, 'mean') + + cube_with_volume_measure = add_cell_measure(cube, volume_cube, 'volume) + result_volume = volume_statistics(cube_with_volume_measure, 'mean') + + cube_with_ancillary_sftlf = add_ancillary_variable(cube, sftlf_cube) + result_landsea_mask = mask_landsea(cube_with_ancillary_sftlf, 'land') + result_landsea_weighting = weighting_landsea_fraction(cube_with_ancillary_sftlf, 'land') + + cube_with_ancillary_sftgif = add_ancillary_variable(cube, sftgif_cube) + result_landsea_mask = mask_landsea(cube_with_ancillary_sftgif, 'land') + .. _Vertical interpolation: Vertical interpolation @@ -352,8 +379,8 @@ or alternatively: area_type: land exclude: ['CanESM2', 'reference_dataset'] fx_variables: [ - {short_name: sftlf, exp: piControl}, - {short_name: sftof, exp: piControl} + {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftof', 'exp': 'piControl'} ] See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -431,8 +458,8 @@ or alternatively: mask_landsea: mask_out: sea fx_variables: [ - {short_name: sftlf, exp: piControl}, - {short_name: sftof, exp: piControl} + {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftof', 'exp': 'piControl'} ] If the corresponding fx file is not found (which is @@ -485,7 +512,7 @@ or alternatively: landseaicemask: mask_landseaice: mask_out: sea - fx_variables: [{short_name: sftgif, exp: piControl}] + fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] See also :func:`esmvalcore.preprocessor.mask_landseaice`. @@ -1352,7 +1379,7 @@ or as a list of dictionaries: .. code-block:: yaml - fx_variables: [{short_name: areacello, mip: Omon}, {short_name: volcello, mip: fx}] + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. @@ -1430,7 +1457,7 @@ or as a list of dictionaries: .. code-block:: yaml - fx_variables: [{short_name: areacello, mip: Omon}, {short_name: volcello, mip: fx}] + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. diff --git a/esmvalcore/preprocessor/_weighting.py b/esmvalcore/preprocessor/_weighting.py index 6879e0b5e0..32e6c526a0 100644 --- a/esmvalcore/preprocessor/_weighting.py +++ b/esmvalcore/preprocessor/_weighting.py @@ -59,8 +59,7 @@ def weighting_landsea_fraction(cube, area_type): TypeError ``area_type`` is not ``'land'`` or ``'sea'``. ValueError - Land/sea fraction variables ``sftlf`` or ``sftof`` not found or shape - of them is not broadcastable to ``cube``. + Land/sea fraction variables ``sftlf`` or ``sftof`` not found. """ if area_type not in ('land', 'sea'): From d9cacbc18f2cb8151de294ee56845b76eb66ac3b Mon Sep 17 00:00:00 2001 From: sloosvel Date: Sat, 13 Mar 2021 16:43:30 +0100 Subject: [PATCH 37/56] Improve documentation --- doc/recipe/preprocessor.rst | 4 +++- esmvalcore/preprocessor/_ancillary_vars.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index d3bd4e62ec..392fe9ea6b 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -187,7 +187,7 @@ checks them against the CMOR standards and adds them as either a ``cell_measure` or an ``ancillary_variable`` inside the cube of data. This ensures that the defined preprocessor chain is applied to both the variables and the fx_variables. -Note that when calling steps that require ``fx_variables`` inside of diagnostics +Note that when calling steps that require ``fx_variables`` inside of diagnostic scripts, the variables are expected to contain the required ``cell_measures`` or ``ancillary_variables``. If missing, they can be added using the following functions: @@ -213,6 +213,8 @@ scripts, the variables are expected to contain the required ``cell_measures`` or cube_with_ancillary_sftgif = add_ancillary_variable(cube, sftgif_cube) result_landsea_mask = mask_landsea(cube_with_ancillary_sftgif, 'land') + + Details on the arguments needed for each step can be found in the following sections. .. _Vertical interpolation: diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index c8667becdc..deecea4cc7 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -48,7 +48,8 @@ def _load_fx(fx_info, check_level): def add_cell_measure(cube, fx_cube, measure): """ - Add cell_measure in the cube containing the data. + Broadcast fx_cube and add it as a cell_measure in + the cube containing the data. Parameters ---------- @@ -87,7 +88,8 @@ def add_cell_measure(cube, fx_cube, measure): def add_ancillary_variable(cube, fx_cube): """ - Add ancillary variables in the cube containing the data. + Broadcast fx_cube and add it as an ancillary_variable in + the cube containing the data. Parameters ---------- From 584edf1d96f43835f2e2f3e5d754a2f9145e888d Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 15 Mar 2021 08:57:22 +0100 Subject: [PATCH 38/56] Fix bug --- esmvalcore/preprocessor/_ancillary_vars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index deecea4cc7..d4add6d239 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -66,7 +66,7 @@ def add_cell_measure(cube, fx_cube, measure): iris.cube.Cube Cube with added ancillary variables """ - if 'measure' not in ['area', 'volume']: + if measure not in ['area', 'volume']: raise ValueError(f"measure name must be 'area or volume, " f"got {measure} instead") try: From b1a16d74798e19e49302175a091ca140f6c4a3e9 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 15 Mar 2021 12:00:04 +0100 Subject: [PATCH 39/56] Fix flake --- tests/unit/preprocessor/_area/test_area.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index 163cdc1d4a..36c3e85cb9 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -22,6 +22,7 @@ ) from esmvalcore.preprocessor._shared import guess_bounds + class Test(tests.Test): """Test class for the :func:`esmvalcore.preprocessor._area_pp` module.""" def setUp(self): From 9e62deb1713d412cc5c17bb920a50477132c8efa Mon Sep 17 00:00:00 2001 From: sloosvel Date: Wed, 24 Mar 2021 12:04:48 +0100 Subject: [PATCH 40/56] Attempt to fix codacy --- esmvalcore/_recipe.py | 38 +++++++++++++++++------------- esmvalcore/preprocessor/_volume.py | 8 +++---- tests/integration/test_recipe.py | 2 +- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index f324b74374..b4a93efbd5 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -333,6 +333,25 @@ def _add_fxvar_keys(fx_info, variable): return fx_variable +def _search_fx_mip(tables, found_mip, variable, fx_info, config_user): + fx_files = None + for mip in tables: + fx_cmor = tables[mip].get(fx_info['short_name']) + if fx_cmor: + found_mip = True + fx_info['mip'] = mip + fx_info = _add_fxvar_keys(fx_info, variable) + logger.debug( + "For fx variable '%s', found table '%s'", + fx_info['short_name'], mip) + fx_files = _get_input_files(fx_info, config_user)[0] + if fx_files: + logger.debug( + "Found fx variables '%s':\n%s", + fx_info['short_name'], pformat(fx_files)) + return found_mip, fx_info, fx_files + + def _get_fx_files(variable, fx_info, config_user): """Get fx files (searching all possible mips).""" @@ -351,22 +370,8 @@ def _get_fx_files(variable, fx_info, config_user): # force only the mip declared by user found_mip = False if not fx_info['mip']: - for mip in project_tables: - fx_cmor = project_tables[mip].get(fx_info['short_name']) - if fx_cmor: - found_mip = True - fx_info['mip'] = mip - fx_info = _add_fxvar_keys(fx_info, variable) - logger.debug( - "For fx variable '%s', found table '%s'", - fx_info['short_name'], mip) - fx_files = _get_input_files(fx_info, config_user)[0] - if fx_files: - logger.debug( - "Found fx variables '%s':\n%s", - fx_info['short_name'], - pformat(fx_files)) - break + found_mip, fx_info, fx_files = _search_fx_mip( + project_tables, found_mip, variable, fx_info, config_user) else: fx_cmor = project_tables[fx_info['mip']].get(fx_info['short_name']) if fx_cmor: @@ -449,7 +454,6 @@ def _fx_list_to_dict(fx_vars): def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" - # get fx variables either from user defined attribute or fixed def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 5e27805315..27182081be 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -184,10 +184,10 @@ def volume_statistics(cube, operator): Parameters ---------- - cube: iris.cube.Cube - Input cube. - operator: str - The operation to apply to the cube, options are: 'mean'. + cube: iris.cube.Cube + Input cube. + operator: str + The operation to apply to the cube, options are: 'mean'. Returns ------- diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index a4babcf410..dffda99726 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2163,7 +2163,7 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert '_Ofx_' in fx_variables['areacello']['filename'] assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_IyrAnt_' in fx_variables['sftgif']['filename'][0] + assert '_fx_' in fx_variables['sftgif']['filename'] assert '_Ofx_' in fx_variables['sftof']['filename'] # Check mask_landsea From dd5f39665ab22c3b2360f2d581a215796fc4e101 Mon Sep 17 00:00:00 2001 From: sloosvel <45196700+sloosvel@users.noreply.github.com> Date: Wed, 24 Mar 2021 12:43:55 +0100 Subject: [PATCH 41/56] Apply suggestions from code review Co-authored-by: Valeriu Predoi --- doc/recipe/preprocessor.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 4b76f4329c..e7ced92f06 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -180,14 +180,14 @@ To get an overview on data fixes and how to implement new ones, please go to Fx variables as cell measures or ancillary variables ==================================================== -Preprocessors steps related to spatial statistics or masking may require +Preprocessor steps related to spatial statistics or masking may require the use of ``fx_variables`` to be able to perform the computations. The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, -checks them against the CMOR standards and adds them as either a ``cell_measure`` -or an ``ancillary_variable`` inside the cube of data. This ensures that the -defined preprocessor chain is applied to both the variables and the fx_variables. +checks them against CMOR standards and adds them either as ``cell_measure`` +or ``ancillary_variable`` inside the cube data. This ensures that the +defined preprocessor chain is applied to both ``variables`` and ``fx_variables``. -Note that when calling steps that require ``fx_variables`` inside of diagnostic +Note that when calling steps that require ``fx_variables`` inside diagnostic scripts, the variables are expected to contain the required ``cell_measures`` or ``ancillary_variables``. If missing, they can be added using the following functions: @@ -1425,7 +1425,7 @@ the user may specify it calling the variables e.g. areacello: volcello: -or calling the variables and adding specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (the key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml @@ -1503,7 +1503,7 @@ the user may specify it calling the variables e.g. areacello: volcello: -or calling the variables and adding specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (the key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml From 6eda1f2da29cdc961543658b5afde3070267b9a3 Mon Sep 17 00:00:00 2001 From: sloosvel <45196700+sloosvel@users.noreply.github.com> Date: Wed, 24 Mar 2021 16:15:02 +0100 Subject: [PATCH 42/56] Update esmvalcore/preprocessor/_ancillary_vars.py Co-authored-by: Valeriu Predoi --- esmvalcore/preprocessor/_ancillary_vars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index d4add6d239..19cea7ddcf 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -67,7 +67,7 @@ def add_cell_measure(cube, fx_cube, measure): Cube with added ancillary variables """ if measure not in ['area', 'volume']: - raise ValueError(f"measure name must be 'area or volume, " + raise ValueError(f"measure name must be 'area' or 'volume', " f"got {measure} instead") try: fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) From eab43f59911367e3741a0b3eeb3219e1273958c5 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 26 Mar 2021 16:52:10 +0100 Subject: [PATCH 43/56] Improve documentations --- doc/recipe/preprocessor.rst | 19 +++++------------ esmvalcore/preprocessor/_ancillary_vars.py | 24 ++++++++++++++++------ tests/integration/test_recipe.py | 2 ++ 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index e7ced92f06..bf1cc71d78 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -193,26 +193,15 @@ scripts, the variables are expected to contain the required ``cell_measures`` or .. code-block:: - from esmvalcore.preprocessor import (add_cell_measure, - add_ancillary_variable, - area_statistics, - volume_statistics, - mask_landsea, - mask_landseaice, - weighting_landsea_fraction) + from esmvalcore.preprocessor import (add_cell_measure, add_ancillary_variable) cube_with_area_measure = add_cell_measure(cube, area_cube, 'area') - result_area = area_statistics(cube_with_area_measure, 'mean') cube_with_volume_measure = add_cell_measure(cube, volume_cube, 'volume) - result_volume = volume_statistics(cube_with_volume_measure, 'mean') cube_with_ancillary_sftlf = add_ancillary_variable(cube, sftlf_cube) - result_landsea_mask = mask_landsea(cube_with_ancillary_sftlf, 'land') - result_landsea_weighting = weighting_landsea_fraction(cube_with_ancillary_sftlf, 'land') cube_with_ancillary_sftgif = add_ancillary_variable(cube, sftgif_cube) - result_landsea_mask = mask_landsea(cube_with_ancillary_sftgif, 'land') Details on the arguments needed for each step can be found in the following sections. @@ -436,7 +425,8 @@ missing. Conversely, it retrieves the ``fx: sftlf`` mask when land needs to be masked out, respectively. Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain -experiment is preferred for fx data retrieval: +experiment is preferred for fx data retrieval. Note that it is possible to specify as many tags +for the fx variable as required: .. code-block:: yaml @@ -450,6 +440,7 @@ experiment is preferred for fx data retrieval: exp: piControl sftof: exp: piControl + ensemble: r2i1p1f1 or alternatively: @@ -461,7 +452,7 @@ or alternatively: mask_out: sea fx_variables: [ {'short_name': 'sftlf', 'exp': 'piControl'}, - {'short_name': 'sftof', 'exp': 'piControl'} + {'short_name': 'sftof', 'exp': 'piControl', 'ensemble': 'r2i1p1f1'} ] If the corresponding fx file is not found (which is diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index d4add6d239..ba13690e5d 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -60,11 +60,17 @@ def add_cell_measure(cube, fx_cube, measure): measure: str Name of the measure, can be 'area' or 'volume'. - Returns ------- iris.cube.Cube Cube with added ancillary variables + + Raises + ------ + ValueError + If measure name is not 'area' or 'volume'. + ValueError + If fx_cube cannot be broadcast to cube. """ if measure not in ['area', 'volume']: raise ValueError(f"measure name must be 'area or volume, " @@ -72,8 +78,9 @@ def add_cell_measure(cube, fx_cube, measure): try: fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) except ValueError as exc: - raise ValueError(f"Frequencies of {cube.var_name} and " - f"{fx_cube.var_name} cubes do not match.") from exc + raise ValueError(f"Dimensions of {cube.var_name} and " + f"{fx_cube.var_name} cubes do not match. " + "Cannot broadcast cubes.") from exc measure = iris.coords.CellMeasure( fx_data, standard_name=fx_cube.standard_name, @@ -98,17 +105,22 @@ def add_ancillary_variable(cube, fx_cube): fx_cube: iris.cube.Cube Iris cube with fx data. - Returns ------- iris.cube.Cube Cube with added ancillary variables + + Raises + ------ + ValueError + If fx_cube cannot be broadcast to cube. """ try: fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) except ValueError as exc: - raise ValueError(f"Frequencies of {cube.var_name} and " - f"{fx_cube.var_name} cubes do not match.") from exc + raise ValueError(f"Dimensions of {cube.var_name} and " + f"{fx_cube.var_name} cubes do not match. " + "Cannot broadcast cubes.") from exc ancillary_var = iris.coords.AncillaryVariable( fx_data, standard_name=fx_cube.standard_name, diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 0d1dd0b942..a90dd9e37c 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2053,6 +2053,7 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): operator: mean fx_variables: areacella: + ensemble: r2i1p1f1 areacello: clayfrac: sftlf: @@ -2096,6 +2097,7 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 assert '_fx_' in fx_variables['areacella']['filename'] + assert '_r2i1p1f1_' in fx_variables['areacella']['filename'] assert '_Ofx_' in fx_variables['areacello']['filename'] assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] From bb3ece4cd4b543c28f1b895719822b719dcc55cc Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 26 Mar 2021 17:11:18 +0100 Subject: [PATCH 44/56] Update message in test --- .../preprocessor/_ancillary_vars/test_add_fx_variables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py index ba8ffbeb42..40a4d658e9 100644 --- a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py +++ b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py @@ -189,6 +189,6 @@ def test_wrong_time_frequency(self, tmp_path): with pytest.raises(ValueError) as excinfo: cube = add_fx_variables( cube, fx_vars, CheckLevels.IGNORE) - msg = (f"Frequencies of {cube.var_name} and " - f"{volume_cube.var_name} cubes do not match.") + msg = (f"Dimensions of {cube.var_name} and {volume_cube.var_name} " + "do not match. Cannot broadcast cubes.") assert msg in str(excinfo.value) From c7a913d491a09627d9231f426c56df7a2a57118a Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 26 Mar 2021 17:20:00 +0100 Subject: [PATCH 45/56] Update test properly --- .../preprocessor/_ancillary_vars/test_add_fx_variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py index 40a4d658e9..2de45f0569 100644 --- a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py +++ b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py @@ -190,5 +190,5 @@ def test_wrong_time_frequency(self, tmp_path): cube = add_fx_variables( cube, fx_vars, CheckLevels.IGNORE) msg = (f"Dimensions of {cube.var_name} and {volume_cube.var_name} " - "do not match. Cannot broadcast cubes.") + "cubes do not match. Cannot broadcast cubes.") assert msg in str(excinfo.value) From a78d76afc3cb65dcb0a5ff46b12ed526d2189062 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 26 Mar 2021 17:30:48 +0100 Subject: [PATCH 46/56] Print cube in log --- esmvalcore/preprocessor/_area.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index a2edd9cc29..8016afef0e 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -210,8 +210,8 @@ def area_statistics(cube, operator): grid_areas = cube.cell_measure('cell_area').core_data() except iris.exceptions.CellMeasureNotFoundError: logger.info( - 'Cell measure "cell_area" not found in cube. ' - 'Check fx_file availability.' + 'Cell measure "cell_area" not found in cube %s. ' + 'Check fx_file availability.', cube ) logger.info('Attempting to calculate grid cell area...') From 17aa93e06d95337996ae598501c212eb148e4c36 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 29 Mar 2021 16:42:30 +0200 Subject: [PATCH 47/56] Fix condition --- esmvalcore/preprocessor/_area.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 8016afef0e..388acf8881 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -215,7 +215,7 @@ def area_statistics(cube, operator): ) logger.info('Attempting to calculate grid cell area...') - if cube.coord('latitude').points.ndim == 2: + if not grid_areas and cube.coord('latitude').points.ndim == 2: coord_names = [coord.standard_name for coord in cube.coords()] if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) @@ -234,7 +234,7 @@ def area_statistics(cube, operator): cube.coord('latitude')) coord_names = ['longitude', 'latitude'] - if grid_areas is None: + if not grid_areas: cube = guess_bounds(cube, coord_names) grid_areas = iris.analysis.cartography.area_weights(cube) logger.info('Calculated grid area shape: %s', grid_areas.shape) From 61e0a515244619ae2a9a64a949c13cf466ee3c86 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 29 Mar 2021 16:51:49 +0200 Subject: [PATCH 48/56] Fix previous commit mess --- esmvalcore/preprocessor/_area.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 388acf8881..fbd156030f 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -215,7 +215,7 @@ def area_statistics(cube, operator): ) logger.info('Attempting to calculate grid cell area...') - if not grid_areas and cube.coord('latitude').points.ndim == 2: + if grid_areas is None and cube.coord('latitude').points.ndim == 2: coord_names = [coord.standard_name for coord in cube.coords()] if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) @@ -234,7 +234,7 @@ def area_statistics(cube, operator): cube.coord('latitude')) coord_names = ['longitude', 'latitude'] - if not grid_areas: + if grid_areas is None: cube = guess_bounds(cube, coord_names) grid_areas = iris.analysis.cartography.area_weights(cube) logger.info('Calculated grid area shape: %s', grid_areas.shape) From 93a9afd82213151aa3d0e8396398c414ede9b934 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 29 Mar 2021 16:13:14 +0100 Subject: [PATCH 49/56] fix test --- tests/integration/test_recipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index a90dd9e37c..d5492d521d 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2165,7 +2165,8 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert '_Ofx_' in fx_variables['areacello']['filename'] assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_fx_' in fx_variables['sftgif']['filename'] + assert len(fx_variables['sftgif']['filename']) == 1 + assert 'sftgif_' in fx_variables['sftgif']['filename'][0] assert '_Ofx_' in fx_variables['sftof']['filename'] # Check mask_landsea From 28a88eed6db74ee02036e9fd7bfe8c19970f5976 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 30 Mar 2021 09:42:52 +0100 Subject: [PATCH 50/56] run a full dev test; I will remove this after test runs --- .github/workflows/action-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/action-test.yml b/.github/workflows/action-test.yml index a7a0080e6e..6f4032c3a1 100644 --- a/.github/workflows/action-test.yml +++ b/.github/workflows/action-test.yml @@ -21,6 +21,7 @@ on: push: branches: - master + - dev_load_cell_measures_in_cube # run the test only if the PR is to master # turn it on if required #pull_request: From bd2af406653343c822f773fe5921d12328beafea Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 30 Mar 2021 10:30:14 +0100 Subject: [PATCH 51/56] removed temporary call to run GA test on this branch --- .github/workflows/action-test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/action-test.yml b/.github/workflows/action-test.yml index 6f4032c3a1..a7a0080e6e 100644 --- a/.github/workflows/action-test.yml +++ b/.github/workflows/action-test.yml @@ -21,7 +21,6 @@ on: push: branches: - master - - dev_load_cell_measures_in_cube # run the test only if the PR is to master # turn it on if required #pull_request: From aa493f63a33a4b3c48482b680036688764a5a49d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 30 Mar 2021 10:31:03 +0100 Subject: [PATCH 52/56] install in CWD --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 27be5c21ae..03e0798bb0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,7 +32,7 @@ jobs: command: | . /opt/conda/etc/profile.d/conda.sh conda activate esmvaltool - pip install .[test] + pip install -e .[test] pytest -n 2 -m "not installation" - save_cache: key: test-{{ .Branch }} From 2545a4b22fcdbbc0108d87fb20d4900acf4fbf48 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 30 Mar 2021 10:45:12 +0100 Subject: [PATCH 53/56] print which esmvalcore is used --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 03e0798bb0..ff5e8bb49d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -33,6 +33,7 @@ jobs: . /opt/conda/etc/profile.d/conda.sh conda activate esmvaltool pip install -e .[test] + conda list esmvalcore pytest -n 2 -m "not installation" - save_cache: key: test-{{ .Branch }} From 6b48a1034c824f7baa4bbddd7d9fe12427364684 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 30 Mar 2021 12:33:07 +0100 Subject: [PATCH 54/56] revert changes done for testing only --- .circleci/config.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index ff5e8bb49d..27be5c21ae 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,8 +32,7 @@ jobs: command: | . /opt/conda/etc/profile.d/conda.sh conda activate esmvaltool - pip install -e .[test] - conda list esmvalcore + pip install .[test] pytest -n 2 -m "not installation" - save_cache: key: test-{{ .Branch }} From 40d618462344659693f7765dd57d64c8c60ac431 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 30 Mar 2021 15:19:02 +0200 Subject: [PATCH 55/56] Update test --- tests/integration/test_recipe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index d5492d521d..a90dd9e37c 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2165,8 +2165,7 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): assert '_Ofx_' in fx_variables['areacello']['filename'] assert '_Efx_' in fx_variables['clayfrac']['filename'] assert '_fx_' in fx_variables['sftlf']['filename'] - assert len(fx_variables['sftgif']['filename']) == 1 - assert 'sftgif_' in fx_variables['sftgif']['filename'][0] + assert '_fx_' in fx_variables['sftgif']['filename'] assert '_Ofx_' in fx_variables['sftof']['filename'] # Check mask_landsea From cedcdac43f00631d44cd755c3163da4164de77be Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 20 Apr 2021 17:31:48 +0200 Subject: [PATCH 56/56] Remove fx_vars before saving --- esmvalcore/_recipe.py | 2 ++ esmvalcore/preprocessor/__init__.py | 7 +++-- esmvalcore/preprocessor/_ancillary_vars.py | 26 +++++++++++++++++++ .../_ancillary_vars/test_add_fx_variables.py | 25 +++++++++++++++++- tests/integration/test_recipe.py | 3 +++ 5 files changed, 60 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index dacfed659f..6db4df455c 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -315,10 +315,12 @@ def _get_default_settings(variable, config_user, derive=False): if variable['short_name'] != variable['original_short_name']: settings['save']['alias'] = variable['short_name'] + # Configure fx settings settings['add_fx_variables'] = { 'fx_variables': {}, 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) } + settings['remove_fx_variables'] = {} return settings diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 069b9129f3..9fe86d140e 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -10,7 +10,7 @@ from .._task import BaseTask from ..cmor.check import cmor_check_data, cmor_check_metadata from ..cmor.fix import fix_data, fix_file, fix_metadata -from ._ancillary_vars import add_fx_variables +from ._ancillary_vars import add_fx_variables, remove_fx_variables from ._area import ( area_statistics, extract_named_regions, @@ -94,6 +94,7 @@ # Data reformatting/CMORization 'fix_data', 'cmor_check_data', + # Load fx_variables in cube 'add_fx_variables', # Time extraction (as defined in the preprocessor section) 'extract_time', @@ -158,6 +159,8 @@ 'linear_trend', 'linear_trend_stderr', 'convert_units', + # Remove fx_variables from cube + 'remove_fx_variables', # Save to file 'save', 'cleanup', @@ -182,7 +185,7 @@ # The order of initial and final steps cannot be configured INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('add_fx_variables') + 1] -FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('save'):] +FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('remove_fx_variables'):] MULTI_MODEL_FUNCTIONS = { 'multi_model_statistics', diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py index 88f5582dd5..5d6dbe32d1 100644 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -175,3 +175,29 @@ def add_fx_variables(cube, fx_variables, check_level): else: add_ancillary_variable(cube, fx_cube) return cube + + +def remove_fx_variables(cube): + """ + Remove fx variables present as cell measures or ancillary variables in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with data and cell measures or ancillary variables. + + + Returns + ------- + iris.cube.Cube + Cube without cell measures or ancillary variables. + """ + + if cube.cell_measures(): + for measure in cube.cell_measures(): + cube.remove_cell_measure(measure.standard_name) + if cube.ancillary_variables(): + for variable in cube.ancillary_variables(): + cube.remove_ancillary_variable(variable.standard_name) + return cube diff --git a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py index 2de45f0569..7ac756d50a 100644 --- a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py +++ b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py @@ -11,7 +11,10 @@ import pytest from esmvalcore.cmor.check import CheckLevels -from esmvalcore.preprocessor._ancillary_vars import add_fx_variables +from esmvalcore.preprocessor._ancillary_vars import (add_fx_variables, + add_ancillary_variable, + add_cell_measure, + remove_fx_variables) logger = logging.getLogger(__name__) @@ -192,3 +195,23 @@ def test_wrong_time_frequency(self, tmp_path): msg = (f"Dimensions of {cube.var_name} and {volume_cube.var_name} " "cubes do not match. Cannot broadcast cubes.") assert msg in str(excinfo.value) + + def test_remove_fx_vars(self): + """Test fx_variables are removed from cube.""" + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[(self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + self.fx_area.var_name = 'areacella' + self.fx_area.standard_name = 'cell_area' + self.fx_area.units = 'm2' + add_cell_measure(cube, self.fx_area, measure='area') + assert cube.cell_measure(self.fx_area.standard_name) is not None + self.fx_area.var_name = 'sftlf' + self.fx_area.standard_name = "land_area_fraction" + self.fx_area.units = '%' + add_ancillary_variable(cube, self.fx_area) + assert cube.ancillary_variable(self.fx_area.standard_name) is not None + cube = remove_fx_variables(cube) + assert cube.cell_measures() == [] + assert cube.ancillary_variables() == [] diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index a90dd9e37c..81e3ef7720 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -86,6 +86,7 @@ 'fix_file', 'fix_metadata', 'load', + 'remove_fx_variables', 'save', ) @@ -166,6 +167,7 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, }, + 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] }, @@ -591,6 +593,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'fx_variables': {}, 'check_level': CheckLevels.DEFAULT, }, + 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] },