diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 8a0c1a54b7..19020ba8f8 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -1388,7 +1388,7 @@ statistics. Parameters: * operator: operation to apply. Accepted values are 'mean', 'median', - 'std_dev', 'min', 'max', 'sum' and 'rms'. Default is 'mean' + 'std_dev', 'variance', 'min', 'max', 'sum' and 'rms'. Default is 'mean'. * period: define the granularity of the statistics: get values for the full period, for each month, day of year or hour of day. @@ -1398,6 +1398,12 @@ Parameters: * seasons: if period 'seasonal' or 'season' allows to set custom seasons. Default is '[DJF, MAM, JJA, SON]' +.. note:: + The 'mean', 'sum' and 'rms' operations over the 'full' period are weighted + by the time coordinate, i.e., the length of the time intervals. + For 'sum', the units of the resulting cube are multiplied by corresponding + time units (e.g., days). + Examples: * Monthly climatology: @@ -1877,23 +1883,26 @@ See also :func:`esmvalcore.preprocessor.meridional_means`. ``area_statistics`` ------------------- -This function calculates the average value over a region - weighted by the cell -areas of the region. This function takes the argument, ``operator``: the name -of the operation to apply. +This function calculates statistics over a region. +It takes one argument, ``operator``, which is the name of the operation to +apply. This function can be used to apply several different operations in the -horizontal plane: mean, standard deviation, median, variance, minimum, maximum and root mean square. +horizontal plane: mean, sum, standard deviation, median, variance, minimum, +maximum and root mean square. +The operations mean, sum and root mean square are area weighted. +For sums, the units of the resulting cubes are multiplied by m :math:`^2`. -Note that this function is applied over the entire dataset. If only a specific -region, depth layer or time period is required, then those regions need to be -removed using other preprocessor operations in advance. +Note that this function is applied over the entire dataset. +If only a specific region, depth layer or time period is required, then those +regions need to be removed using other preprocessor operations in advance. -This function requires a cell area `cell measure`_, unless the coordinates of the -input data are regular 1D latitude and longitude coordinates so the cell areas -can be computed. -The required supplementary variable, either ``areacella`` for atmospheric variables -or ``areacello`` for ocean variables, can be attached to the main dataset -as described in :ref:`supplementary_variables`. +This function requires a cell area `cell measure`_, unless the coordinates of +the input data are regular 1D latitude and longitude coordinates so the cell +areas can be computed. +The required supplementary variable, either ``areacella`` for atmospheric +variables or ``areacello`` for ocean variables, can be attached to the main +dataset as described in :ref:`supplementary_variables`. .. deprecated:: 2.8.0 The optional ``fx_variables`` argument specifies the fx variables that the user @@ -2025,15 +2034,22 @@ Takes arguments: be performed must be one-dimensional, as multidimensional coordinates are not supported in this preprocessor. + The 'mean', 'sum' and 'rms' operations are weighted by the corresponding + coordinate bounds. + For 'sum', the units of the resulting cube will be multiplied by + corresponding coordinate units. + See also :func:`esmvalcore.preprocessor.axis_statistics`. ``depth_integration`` --------------------- -This function integrates over the depth dimension. This function does a -weighted sum along the `z`-coordinate, and removes the `z` direction of the -output cube. This preprocessor takes no arguments. +This function integrates over the depth dimension. +This function does a weighted sum along the `z`-coordinate, and removes the `z` +direction of the output cube. +This preprocessor takes no arguments. +The units of the resulting cube are multiplied by the `z`-coordinate units. See also :func:`esmvalcore.preprocessor.depth_integration`. diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 9477811415..5703a39639 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -8,7 +8,7 @@ import logging import warnings from pathlib import Path -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Iterable, Optional import fiona import iris @@ -16,9 +16,9 @@ import shapely import shapely.ops from dask import array as da -from iris.coords import AuxCoord +from iris.coords import AuxCoord, CellMeasure from iris.cube import Cube, CubeList -from iris.exceptions import CoordinateNotFoundError +from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError from ._shared import ( get_iris_analysis_operation, @@ -40,30 +40,36 @@ SHAPE_ID_KEYS: tuple[str, ...] = ('name', 'NAME', 'Name', 'id', 'ID') -def extract_region(cube, start_longitude, end_longitude, start_latitude, - end_latitude): +def extract_region( + cube: Cube, + start_longitude: float, + end_longitude: float, + start_latitude: float, + end_latitude: float, +) -> Cube: """Extract a region from a cube. Function that subsets a cube on a box (start_longitude, end_longitude, - start_latitude, end_latitude) + start_latitude, end_latitude). Parameters ---------- - cube: iris.cube.Cube - input data cube. - start_longitude: float + cube: + Input data cube. + start_longitude: Western boundary longitude. - end_longitude: float + end_longitude: Eastern boundary longitude. - start_latitude: float + start_latitude: Southern Boundary latitude. - end_latitude: float + end_latitude: Northern Boundary Latitude. Returns ------- iris.cube.Cube - smaller cube. + Smaller cube. + """ # first examine if any cell_measures are present cell_measures = cube.cell_measures() @@ -183,18 +189,16 @@ def _extract_irregular_region(cube, start_longitude, end_longitude, return cube -def zonal_statistics(cube, operator): +def zonal_statistics(cube: Cube, operator: str) -> Cube: """Compute zonal statistics. Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms'. + cube: + Input cube. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- @@ -206,6 +210,7 @@ def zonal_statistics(cube, operator): ValueError Error raised if computation on irregular grids is attempted. Zonal statistics not yet implemented for irregular grids. + """ if cube.coord('longitude').points.ndim < 2: operation = get_iris_analysis_operation(operator) @@ -216,18 +221,16 @@ def zonal_statistics(cube, operator): raise ValueError(msg) -def meridional_statistics(cube, operator): +def meridional_statistics(cube: Cube, operator: str) -> Cube: """Compute meridional statistics. Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms'. + cube: + Input cube. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- @@ -239,6 +242,7 @@ def meridional_statistics(cube, operator): ValueError Error raised if computation on irregular grids is attempted. Zonal statistics not yet implemented for irregular grids. + """ if cube.coord('latitude').points.ndim < 2: operation = get_iris_analysis_operation(operator) @@ -266,125 +270,151 @@ def compute_area_weights(cube): return weights +def _try_adding_calculated_cell_area(cube: Cube) -> None: + """Try to add calculated cell measure 'cell_area' to cube (in-place).""" + assert not cube.cell_measures('cell_area') + + logger.debug( + "Found no cell measure 'cell_area' in cube %s. Check availability of " + "supplementary variables", + cube.summary(shorten=True), + ) + logger.debug("Attempting to calculate grid cell area") + + regular_grid = all([ + cube.coord('latitude').points.ndim == 1, + cube.coord('longitude').points.ndim == 1, + cube.coord_dims('latitude') != cube.coord_dims('longitude'), + ]) + rotated_pole_grid = all([ + cube.coord('latitude').points.ndim == 2, + cube.coord('longitude').points.ndim == 2, + cube.coords('grid_latitude'), + cube.coords('grid_longitude'), + ]) + + # For regular grids, calculate grid cell areas with iris function + if regular_grid: + cube = guess_bounds(cube, ['latitude', 'longitude']) + logger.debug("Calculating grid cell areas for regular grid") + cell_areas = compute_area_weights(cube) + + # For rotated pole grids, use grid_latitude and grid_longitude to calculate + # grid cell areas + elif rotated_pole_grid: + cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) + cube_tmp = cube.copy() + cube_tmp.remove_coord('latitude') + cube_tmp.coord('grid_latitude').rename('latitude') + cube_tmp.remove_coord('longitude') + cube_tmp.coord('grid_longitude').rename('longitude') + logger.debug("Calculating grid cell areas for rotated pole grid") + cell_areas = compute_area_weights(cube_tmp) + + # For all other cases, grid cell areas cannot be calculated + else: + logger.error( + "Supplementary variables are needed to calculate grid cell " + "areas for irregular or unstructured grid of cube %s", + cube.summary(shorten=True), + ) + raise CoordinateMultiDimError(cube.coord('latitude')) + + # Add new cell measure + cell_measure = CellMeasure( + cell_areas, standard_name='cell_area', units='m2', measure='area', + ) + cube.add_cell_measure(cell_measure, np.arange(cube.ndim)) + + @register_supplementaries( variables=['areacella', 'areacello'], required='prefer_at_least_one', ) -def area_statistics(cube, operator): - """Apply a statistical operator in the horizontal direction. +def area_statistics(cube: Cube, operator: str) -> Cube: + """Apply a statistical operator in the horizontal plane. - The average in the horizontal direction. We assume that the - horizontal directions are ['longitude', 'latutude']. + We assume that the horizontal directions are ['longitude', 'latitude']. - This function can be used to apply - several different operations in the horizontal plane: mean, standard - deviation, median variance, minimum and maximum. These options are - specified using the `operator` argument and the following key word - arguments: + This function can be used to apply several different operations in the + horizontal plane: mean, standard deviation, median variance, minimum and + maximum. The following options for `operator` are allowed: +------------+--------------------------------------------------+ - | `mean` | Area weighted mean. | + | `mean` | Area weighted mean | +------------+--------------------------------------------------+ | `median` | Median (not area weighted) | +------------+--------------------------------------------------+ | `std_dev` | Standard Deviation (not area weighted) | +------------+--------------------------------------------------+ - | `sum` | Area weighted sum. | + | `sum` | Area weighted sum | +------------+--------------------------------------------------+ | `variance` | Variance (not area weighted) | +------------+--------------------------------------------------+ - | `min`: | Minimum value | + | `min` | Minimum value | +------------+--------------------------------------------------+ | `max` | Maximum value | +------------+--------------------------------------------------+ - | `rms` | Area weighted root mean square. | + | `rms` | Area weighted root mean square | +------------+--------------------------------------------------+ + Note that for area-weighted sums, the units of the resulting cube will be + multiplied by m :math:`^2`. + Parameters ---------- - cube: iris.cube.Cube - Input cube. The input cube should have a - :class:`iris.coords.CellMeasure` named ``'cell_area'``, unless it - has regular 1D latitude and longitude coordinates so the cell areas - can be computed using - :func:`iris.analysis.cartography.area_weights`. - operator: str - The operation, options: mean, median, min, max, std_dev, sum, - variance, rms. + cube: + Input cube. The input cube should have a + :class:`iris.coords.CellMeasure` named ``'cell_area'``, unless it has + regular 1D latitude and longitude coordinates so the cell areas can be + computed using :func:`iris.analysis.cartography.area_weights`. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. Raises ------ iris.exceptions.CoordinateMultiDimError - Exception for latitude axis with dim > 2. - ValueError - if input data cube has different shape than grid area weights + Cube has irregular or unstructured grid but supplementary variable + `cell_area` is not available. + """ original_dtype = cube.dtype - grid_areas = None - try: - grid_areas = cube.cell_measure('cell_area').core_data() - except iris.exceptions.CellMeasureNotFoundError: - logger.debug( - 'Cell measure "cell_area" not found in cube %s. ' - 'Check fx_file availability.', cube.summary(shorten=True)) - logger.debug('Attempting to calculate grid cell area...') - else: - grid_areas = da.broadcast_to(grid_areas, cube.shape) - - if grid_areas is None and cube.coord('latitude').points.ndim == 2: - coord_names = [coord.standard_name for coord in cube.coords()] - if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: - cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) - cube_tmp = cube.copy() - cube_tmp.remove_coord('latitude') - cube_tmp.coord('grid_latitude').rename('latitude') - cube_tmp.remove_coord('longitude') - cube_tmp.coord('grid_longitude').rename('longitude') - grid_areas = compute_area_weights(cube_tmp) - logger.debug('Calculated grid area shape: %s', grid_areas.shape) - else: - logger.error( - 'fx_file needed to calculate grid cell area for irregular ' - 'grids.') - raise iris.exceptions.CoordinateMultiDimError( - cube.coord('latitude')) - - coord_names = ['longitude', 'latitude'] - if grid_areas is None: - cube = guess_bounds(cube, coord_names) - grid_areas = compute_area_weights(cube) - logger.debug('Calculated grid area shape: %s', grid_areas.shape) - - if cube.shape != grid_areas.shape: - raise ValueError('Cube shape ({}) doesn`t match grid area shape ' - '({})'.format(cube.shape, grid_areas.shape)) - operation = get_iris_analysis_operation(operator) + coord_names = ['longitude', 'latitude'] - # TODO: implement weighted stdev, median, s var when available in iris. - # See iris issue: https://github.com/SciTools/iris/issues/3208 - + # Calculate (weighted) statistics if operator_accept_weights(operator): - result = cube.collapsed(coord_names, operation, weights=grid_areas) + # If necessary, try to calculate cell_area (this only works for regular + # grids and certain irregular grids, and fails for others) + if not cube.cell_measures('cell_area'): + _try_adding_calculated_cell_area(cube) + result = cube.collapsed(coord_names, operation, weights='cell_area') else: # Many IRIS analysis functions do not accept weights arguments. + # TODO: implement weighted stdev, median, var when available in iris. + # See iris issue: https://github.com/SciTools/iris/issues/3208 result = cube.collapsed(coord_names, operation) + # Make sure to preserve dtype new_dtype = result.dtype if original_dtype != new_dtype: logger.debug( - "area_statistics changed dtype from " - "%s to %s, changing back", original_dtype, new_dtype) + "area_statistics changed dtype from %s to %s, changing back", + original_dtype, + new_dtype, + ) result.data = result.core_data().astype(original_dtype) + return result -def extract_named_regions(cube, regions): +def extract_named_regions(cube: Cube, regions: str | Iterable[str]) -> Cube: """Extract a specific named region. The region coordinate exist in certain CMIP datasets. @@ -392,15 +422,15 @@ def extract_named_regions(cube, regions): Parameters ---------- - cube: iris.cube.Cube - input cube. - regions: str, list + cube: + Input cube. + regions: A region or list of regions to extract. Returns ------- iris.cube.Cube - collapsed cube. + Smaller cube. Raises ------ @@ -408,6 +438,7 @@ def extract_named_regions(cube, regions): regions is not list or tuple or set. ValueError region not included in cube. + """ # Make sure regions is a list of strings if isinstance(regions, str): @@ -671,7 +702,7 @@ def fix_coordinate_ordering(cube: Cube) -> Cube: Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. Returns @@ -756,9 +787,9 @@ def extract_shape( Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - shapefile: str or Path + shapefile: A shapefile defining the region(s) to extract. Also accepts the following strings to load special shapefiles: @@ -766,19 +797,19 @@ def extract_shape( 6 (https://doi.org/10.5281/zenodo.5176260). Should be used in combination with a :obj:`dict` for the argument `ids`, e.g., ``ids={'Acronym': ['GIC', 'WNA']}``. - method: str, optional + method: Select all points contained by the shape or select a single representative point. Choose either `'contains'` or `'representative'`. If `'contains'` is used, but not a single grid point is contained by the shape, a representative point will be selected. - crop: bool, optional + crop: In addition to masking, crop the resulting cube using :func:`~esmvalcore.preprocessor.extract_region`. Data on irregular grids will not be cropped. - decomposed: bool, optional + decomposed: If set to `True`, the output cube will have an additional dimension `shape_id` describing the requested regions. - ids: list or dict or None, optional + ids: Shapes to be read from the shapefile. Can be given as: * :obj:`list`: IDs are assigned from the attributes `name`, `NAME`, diff --git a/esmvalcore/preprocessor/_shared.py b/esmvalcore/preprocessor/_shared.py index 89b6d13b32..8b3c273c18 100644 --- a/esmvalcore/preprocessor/_shared.py +++ b/esmvalcore/preprocessor/_shared.py @@ -21,51 +21,54 @@ def guess_bounds(cube, coords): return cube -def get_iris_analysis_operation(operator): - """ - Determine the iris analysis operator from a string. +def get_iris_analysis_operation(operator: str) -> iris.analysis.Aggregator: + """Determine the iris analysis operator from a :obj:`str`. Map string to functional operator. Parameters ---------- - operator: str + operator: A named operator. Returns ------- - function: A function from iris.analysis + iris.analysis.Aggregator + Object that can be used within :meth:`iris.cube.Cube.collapsed`, + :meth:`iris.cube.Cube.aggregated_by`, or + :meth:`iris.cube.Cube.rolling_window`. Raises ------ ValueError - operator not in allowed operators list. - allowed operators: mean, median, std_dev, sum, variance, min, max, rms + An invalid operator is specified. Allowed options: `mean`, `median`, + `std_dev`, `sum`, `variance`, `min`, `max`, `rms`. """ operators = [ - 'mean', 'median', 'std_dev', 'sum', 'variance', 'min', 'max', 'rms' + 'mean', 'median', 'std_dev', 'sum', 'variance', 'min', 'max', 'rms', ] operator = operator.lower() if operator not in operators: - raise ValueError("operator {} not recognised. " - "Accepted values are: {}." - "".format(operator, ', '.join(operators))) + raise ValueError( + f"operator '{operator}' not recognised. Accepted values are: " + f"{', '.join(operators)}." + ) operation = getattr(iris.analysis, operator.upper()) return operation -def operator_accept_weights(operator): - """ - Get if operator support weights. +def operator_accept_weights(operator: str) -> bool: + """Get if operator support weights. Parameters ---------- - operator: str + operator: A named operator. Returns ------- - bool: True if operator support weights, False otherwise + bool + ``True`` if operator support weights, ``False`` otherwise. """ return operator.lower() in ('mean', 'sum', 'rms') diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py index 5de60f35ac..265004a8cf 100644 --- a/esmvalcore/preprocessor/_time.py +++ b/esmvalcore/preprocessor/_time.py @@ -3,20 +3,23 @@ Allows for selecting data subsets using certain time bounds; constructing seasonal and area averages. """ +from __future__ import annotations + import copy import datetime import logging -from typing import Union +from typing import Iterable, Optional from warnings import filterwarnings import dask.array as da import iris import iris.coord_categorisation -import iris.cube import iris.exceptions import iris.util import isodate import numpy as np +from iris.coords import AuxCoord +from iris.cube import Cube, CubeList from iris.time import PartialDateTime from esmvalcore.cmor.check import _get_next_month, _get_time_bounds @@ -44,8 +47,15 @@ ) -def extract_time(cube, start_year, start_month, start_day, end_year, end_month, - end_day): +def extract_time( + cube: Cube, + start_year: int, + start_month: int, + start_day: int, + end_year: int, + end_month: int, + end_day: int, +) -> Cube: """Extract a time range from a cube. Given a time range passed in as a series of years, months and days, it @@ -54,20 +64,20 @@ def extract_time(cube, start_year, start_month, start_day, end_year, end_month, Parameters ---------- - cube: iris.cube.Cube - input cube. - start_year: int - start year - start_month: int - start month - start_day: int - start day - end_year: int - end year - end_month: int - end month - end_day: int - end day + cube: + Input cube. + start_year: + Start year. + start_month: + Start month. + start_day: + Start day. + end_year: + End year. + end_month: + End month. + end_day: + End day. Returns ------- @@ -77,7 +87,8 @@ def extract_time(cube, start_year, start_month, start_day, end_year, end_month, Raises ------ ValueError - if time ranges are outside the cube time limits + Time ranges are outside the cube time limits. + """ t_1 = PartialDateTime(year=int(start_year), month=int(start_month), @@ -136,10 +147,7 @@ def _duration_to_date(duration, reference, sign): return date -def _select_timeslice( - cube: iris.cube.Cube, - select: np.ndarray, -) -> Union[iris.cube.Cube, None]: +def _select_timeslice(cube: Cube, select: np.ndarray) -> Cube | None: """Slice a cube along its time axis.""" if select.any(): coord = cube.coord('time') @@ -157,10 +165,10 @@ def _select_timeslice( def _extract_datetime( - cube: iris.cube.Cube, + cube: Cube, start_datetime: PartialDateTime, end_datetime: PartialDateTime, -) -> iris.cube.Cube: +) -> Cube: """Extract a time range from a cube. Given a time range passed in as a datetime.datetime object, it @@ -169,12 +177,12 @@ def _extract_datetime( Parameters ---------- - cube: iris.cube.Cube - input cube. - start_datetime: PartialDateTime - start datetime - end_datetime: PartialDateTime - end datetime + cube: + Input cube. + start_datetime: + Start datetime + end_datetime: + End datetime Returns ------- @@ -221,14 +229,14 @@ def dt2str(time: PartialDateTime) -> str: return cube_slice -def clip_timerange(cube, timerange): +def clip_timerange(cube: Cube, timerange: str) -> Cube: """Extract time range with a resolution up to seconds. Parameters ---------- - cube : iris.cube.Cube + cube: Input cube. - timerange : str + timerange: str Time range in ISO 8601 format. Returns @@ -240,12 +248,10 @@ def clip_timerange(cube, timerange): ------ ValueError Time ranges are outside the cube's time limits. - """ - start_date = timerange.split('/')[0] - start_date = _parse_start_date(start_date) - end_date = timerange.split('/')[1] - end_date = _parse_end_date(end_date) + """ + start_date = _parse_start_date(timerange.split('/')[0]) + end_date = _parse_end_date(timerange.split('/')[1]) if isinstance(start_date, isodate.duration.Duration): start_date = _duration_to_date(start_date, end_date, sign=-1) @@ -280,14 +286,14 @@ def clip_timerange(cube, timerange): return _extract_datetime(cube, t_1, t_2) -def extract_season(cube, season): +def extract_season(cube: Cube, season: str) -> Cube: """Slice cube to get only the data belonging to a specific season. Parameters ---------- - cube: iris.cube.Cube + cube: Original data - season: str + season: Season to extract. Available: DJF, MAM, JJA, SON and all sequentially correct combinations: e.g. JJAS @@ -299,7 +305,8 @@ def extract_season(cube, season): Raises ------ ValueError - if requested season is not present in the cube + Requested season is not present in the cube. + """ season = season.upper() @@ -334,25 +341,26 @@ def extract_season(cube, season): return result -def extract_month(cube, month): +def extract_month(cube: Cube, month: int) -> Cube: """Slice cube to get only the data belonging to a specific month. Parameters ---------- - cube: iris.cube.Cube + cube: Original data - month: int - Month to extract as a number from 1 to 12 + month: + Month to extract as a number from 1 to 12. Returns ------- iris.cube.Cube - data cube for specified month. + Cube for specified month. Raises ------ ValueError - if requested month is not present in the cube + Requested month is not present in the cube. + """ if month not in range(1, 13): raise ValueError('Please provide a month number between 1 and 12.') @@ -366,18 +374,19 @@ def extract_month(cube, month): return result -def get_time_weights(cube): +def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array: """Compute the weighting of the time axis. Parameters ---------- - cube: iris.cube.Cube - input cube. + cube: + Input cube. Returns ------- - numpy.array + np.ndarray or da.core.Array Array of time weights for averaging. + """ time = cube.coord('time') coord_dims = cube.coord_dims('time') @@ -387,8 +396,8 @@ def get_time_weights(cube): if len(coord_dims) > 1: raise ValueError( f"Weighted statistical operations are not supported for " - f"{len(coord_dims):d}D time coordinates, expected " - f"0D or 1D") + f"{len(coord_dims):d}D time coordinates, expected 0D or 1D" + ) # Extract 1D time weights (= lengths of time intervals) time_weights = time.core_bounds()[:, 1] - time.core_bounds()[:, 0] @@ -425,28 +434,27 @@ def _aggregate_time_fx(result_cube, source_cube): ancillary_dims) -def hourly_statistics(cube, hours, operator='mean'): +def hourly_statistics(cube: Cube, hours: int, operator: str = 'mean') -> Cube: """Compute hourly statistics. Chunks time in x hours periods and computes statistics over them. Parameters ---------- - cube: iris.cube.Cube - input cube. - - hours: int - Number of hours per period. Must be a divisor of 24 - (1, 2, 3, 4, 6, 8, 12) - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', 'max' + cube: + Input cube. + hours: + Number of hours per period. Must be a divisor of 24, i.e., (1, 2, 3, 4, + 6, 8, 12). + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Hourly statistics cube + Hourly statistics cube. + """ if not cube.coords('hour_group'): iris.coord_categorisation.add_categorised_coord( @@ -471,25 +479,24 @@ def hourly_statistics(cube, hours, operator='mean'): return result -def daily_statistics(cube, operator='mean'): +def daily_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute daily statistics. Chunks time in daily periods and computes statistics over them; Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Daily statistics cube + Daily statistics cube. + """ if not cube.coords('day_of_year'): iris.coord_categorisation.add_day_of_year(cube, 'time') @@ -504,25 +511,24 @@ def daily_statistics(cube, operator='mean'): return result -def monthly_statistics(cube, operator='mean'): +def monthly_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute monthly statistics. Chunks time in monthly periods and computes statistics over them; Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Monthly statistics cube + Monthly statistics cube. + """ if not cube.coords('month_number'): iris.coord_categorisation.add_month_number(cube, 'time') @@ -535,24 +541,23 @@ def monthly_statistics(cube, operator='mean'): return result -def seasonal_statistics(cube, - operator='mean', - seasons=('DJF', 'MAM', 'JJA', 'SON')): +def seasonal_statistics( + cube: Cube, + operator: str = 'mean', + seasons: Iterable[str] = ('DJF', 'MAM', 'JJA', 'SON'), +) -> Cube: """Compute seasonal statistics. Chunks time seasons and computes statistics over them. Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' - - seasons: list or tuple of str, optional + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. + seasons: optional Seasons to build. Available: ('DJF', 'MAM', 'JJA', SON') (default) and all sequentially correct combinations holding every month of a year: e.g. ('JJAS','ONDJFMAM'), or less in case of prior season @@ -561,7 +566,8 @@ def seasonal_statistics(cube, Returns ------- iris.cube.Cube - Seasonal statistic cube + Seasonal statistic cube. + """ seasons = tuple(sea.upper() for sea in seasons) @@ -595,18 +601,19 @@ def seasonal_statistics(cube, # Ranging on [29, 31] days makes this calendar-independent # the only season this could not work is 'F' but this raises an # ValueError - def spans_full_season(cube): + def spans_full_season(cube: Cube) -> list[bool]: """Check for all month present in the season. Parameters ---------- - cube: iris.cube.Cube - input cube. + cube: + Input cube. Returns ------- - bool - truth statement if time bounds are within (month*29, month*31) + list[bool] + Truth statements if time bounds are within (month*29, month*31) + """ time = cube.coord('time') num_days = [(tt.bounds[0, 1] - tt.bounds[0, 0]) for tt in time] @@ -622,7 +629,7 @@ def spans_full_season(cube): return result -def annual_statistics(cube, operator='mean'): +def annual_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute annual statistics. Note that this function does not weight the annual mean if @@ -631,18 +638,17 @@ def annual_statistics(cube, operator='mean'): Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Annual statistics cube + Annual statistics cube. + """ # TODO: Add weighting in time dimension. See iris issue 3290 # https://github.com/SciTools/iris/issues/3290 @@ -656,7 +662,7 @@ def annual_statistics(cube, operator='mean'): return result -def decadal_statistics(cube, operator='mean'): +def decadal_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute decadal statistics. Note that this function does not weight the decadal mean if @@ -665,18 +671,17 @@ def decadal_statistics(cube, operator='mean'): Parameters ---------- - cube: iris.cube.Cube - input cube. - + cube: + Input cube. operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Decadal statistics cube + Decadal statistics cube. + """ # TODO: Add weighting in time dimension. See iris issue 3290 # https://github.com/SciTools/iris/issues/3290 @@ -697,31 +702,36 @@ def get_decade(coord, value): return result -def climate_statistics(cube, - operator='mean', - period='full', - seasons=('DJF', 'MAM', 'JJA', 'SON')): +def climate_statistics( + cube: Cube, + operator: str = 'mean', + period: str = 'full', + seasons: Iterable[str] = ('DJF', 'MAM', 'JJA', 'SON'), +) -> Cube: """Compute climate statistics with the specified granularity. Computes statistics for the whole dataset. It is possible to get them for the full period or with the data grouped by hour, day, month or season. + Note + ---- + The `mean`, `sum` and `rms` operations over the `full` period are weighted + by the time coordinate, i.e., the length of the time intervals. For `sum`, + the units of the resulting cube will be multiplied by corresponding time + units (e.g., days). + Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms'. - - period: str, optional - Period to compute the statistic over. - Available periods: 'full', 'season', 'seasonal', 'monthly', 'month', - 'mon', 'daily', 'day', 'hourly', 'hour', 'hr'. - - seasons: list or tuple of str, optional + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. + period: optional + Period to compute the statistic over. Available periods: `full`, + `season`, `seasonal`, `monthly`, `month`, `mon`, `daily`, `day`, + `hourly`, `hour`, `hr`. + seasons: Seasons to use if needed. Defaults to ('DJF', 'MAM', 'JJA', 'SON'). Returns @@ -733,19 +743,25 @@ def climate_statistics(cube, original_dtype = cube.dtype period = period.lower() + # Use Cube.collapsed when full period is requested if period in ('full', ): operator_method = get_iris_analysis_operation(operator) if operator_accept_weights(operator): - time_weights = get_time_weights(cube) - if time_weights.min() == time_weights.max(): - # No weighting needed. - clim_cube = cube.collapsed('time', operator_method) - else: - clim_cube = cube.collapsed('time', - operator_method, - weights=time_weights) + time_weights_coord = AuxCoord( + get_time_weights(cube), + long_name='time_weights', + units=cube.coord('time').units, + ) + cube.add_aux_coord(time_weights_coord, cube.coord_dims('time')) + clim_cube = cube.collapsed( + 'time', + operator_method, + weights=time_weights_coord, + ) else: clim_cube = cube.collapsed('time', operator_method) + + # Use Cube.aggregated_by for other periods else: clim_coord = _get_period_coord(cube, period, seasons) operator = get_iris_analysis_operation(operator) @@ -756,24 +772,28 @@ def climate_statistics(cube, iris.util.promote_aux_coord_to_dim_coord(clim_cube, clim_coord.name()) else: - clim_cube = iris.cube.CubeList( + clim_cube = CubeList( clim_cube.slices_over(clim_coord.name())).merge_cube() cube.remove_coord(clim_coord) + # Make sure that original dtype is preserved new_dtype = clim_cube.dtype if original_dtype != new_dtype: logger.debug( "climate_statistics changed dtype from " "%s to %s, changing back", original_dtype, new_dtype) clim_cube.data = clim_cube.core_data().astype(original_dtype) + return clim_cube -def anomalies(cube, - period, - reference=None, - standardize=False, - seasons=('DJF', 'MAM', 'JJA', 'SON')): +def anomalies( + cube: Cube, + period: str, + reference: Optional[dict] = None, + standardize: bool = False, + seasons: Iterable[str] = ('DJF', 'MAM', 'JJA', 'SON'), +) -> Cube: """Compute anomalies using a mean with the specified granularity. Computes anomalies based on hourly, daily, monthly, seasonal or yearly @@ -781,23 +801,19 @@ def anomalies(cube, Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - - period: str - Period to compute the statistic over. - Available periods: 'full', 'season', 'seasonal', 'monthly', 'month', - 'mon', 'daily', 'day', 'hourly', 'hour', 'hr'. - - reference: list int, optional, default: None - Period of time to use a reference, as needed for the 'extract_time' - preprocessor function. If ``None``, all available data is used as a - reference. - - standardize: bool, optional + period: + Period to compute the statistic over. Available periods: `full`, + `season`, `seasonal`, `monthly`, `month`, `mon`, `daily`, `day`, + `hourly`, `hour`, `hr`. + reference: optional + Period of time to use a reference, as needed for the + :func:`~esmvalcore.preprocessor.extract_time` preprocessor function. + If ``None``, all available data is used as a reference. + standardize: optional If ``True`` standardized anomalies are calculated. - - seasons: list or tuple of str, optional + seasons: optional Seasons to use if needed. Defaults to ('DJF', 'MAM', 'JJA', 'SON'). Returns @@ -891,7 +907,7 @@ def _get_period_coord(cube, period, seasons): raise ValueError(f"Period '{period}' not supported") -def regrid_time(cube, frequency): +def regrid_time(cube: Cube, frequency: str) -> Cube: """Align time axis for cubes so they can be subtracted. Operations on time units, time points and auxiliary @@ -903,15 +919,16 @@ def regrid_time(cube, frequency): Parameters ---------- - cube: iris.cube.Cube - input cube. - frequency: str - data frequency: mon, day, 1hr, 3hr or 6hr + cube: + Input cube. + frequency: + Data frequency: `mon`, `day`, `1hr`, `3hr` or `6hr`. Returns ------- iris.cube.Cube - cube with converted time axis and units. + Cube with converted time axis and units. + """ # standardize time points coord = cube.coord('time') @@ -1000,11 +1017,13 @@ def low_pass_weights(window, cutoff): return weights[1:-1] -def timeseries_filter(cube, - window, - span, - filter_type='lowpass', - filter_stats='sum'): +def timeseries_filter( + cube: Cube, + window: int, + span: int, + filter_type: str = 'lowpass', + filter_stats: str = 'sum', +) -> Cube: """Apply a timeseries filter. Method borrowed from `iris example @@ -1018,33 +1037,34 @@ def timeseries_filter(cube, Parameters ---------- - cube: iris.cube.Cube - input cube. - window: int + cube: + Input cube. + window: The length of the filter window (in units of cube time coordinate). - span: int + span: Number of months/days (depending on data frequency) on which weights should be computed e.g. 2-yearly: span = 24 (2 x 12 months). Span should have same units as cube time coordinate. - filter_type: str, optional + filter_type: optional Type of filter to be applied; default 'lowpass'. Available types: 'lowpass'. - filter_stats: str, optional - Type of statistic to aggregate on the rolling window; default 'sum'. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + filter_stats: optional + Type of statistic to aggregate on the rolling window; default: `sum`. + Allowed options: `mean`, `median`, `min`, `max`, `std_dev`, `sum`, + `variance`, `rms`. Returns ------- iris.cube.Cube - cube time-filtered using 'rolling_window'. + Cube time-filtered using 'rolling_window'. Raises ------ iris.exceptions.CoordinateNotFoundError: Cube does not have time coordinate. NotImplementedError: - If filter_type is not implemented. + `filter_type` is not implemented. + """ try: cube.coord('time') @@ -1059,6 +1079,8 @@ def timeseries_filter(cube, ] if filter_type in supported_filters: if filter_type == 'lowpass': + # These weights sum to one and are dimensionless (-> we do NOT need + # to consider units for sums) wgts = low_pass_weights(window, 1. / span) else: raise NotImplementedError( @@ -1075,7 +1097,7 @@ def timeseries_filter(cube, return cube -def resample_hours(cube, interval, offset=0): +def resample_hours(cube: Cube, interval: int, offset: int = 0) -> Cube: """Convert x-hourly data to y-hourly by eliminating extra timesteps. Convert x-hourly data to y-hourly (y > x) by eliminating the extra @@ -1094,11 +1116,11 @@ def resample_hours(cube, interval, offset=0): Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - interval: int + interval: The period (hours) of the desired data. - offset: int, optional + offset: optional The firs hour (hours) of the desired data. Returns @@ -1110,6 +1132,7 @@ def resample_hours(cube, interval, offset=0): ------ ValueError: The specified frequency is not a divisor of 24. + """ allowed_intervals = (1, 2, 3, 4, 6, 12) if interval not in allowed_intervals: @@ -1136,7 +1159,12 @@ def resample_hours(cube, interval, offset=0): return cube -def resample_time(cube, month=None, day=None, hour=None): +def resample_time( + cube: Cube, + month: Optional[int] = None, + day: Optional[int] = None, + hour: Optional[int] = None, +) -> Cube: """Change frequency of data by resampling it. Converts data from one frequency to another by extracting the timesteps @@ -1160,19 +1188,20 @@ def resample_time(cube, month=None, day=None, hour=None): Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - month: int, optional - Month to extract - day: int, optional - Day to extract - hour: int, optional - Hour to extract + month: optional + Month to extract. + day: optional + Day to extract. + hour: optional + Hour to extract. Returns ------- iris.cube.Cube Cube with the new frequency. + """ time = cube.coord('time') dates = time.units.num2date(time.points) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 52b9393235..08fde09828 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -3,12 +3,19 @@ Allows for selecting data subsets using certain volume bounds; selecting depth or height regions; constructing volumetric averages; """ +from __future__ import annotations + import logging +from typing import Iterable, Sequence import dask.array as da import iris import numpy as np +from iris.coords import AuxCoord, CellMeasure +from iris.cube import Cube +from iris.exceptions import CoordinateMultiDimError +from ._area import compute_area_weights from ._shared import get_iris_analysis_operation, operator_accept_weights from ._supplementary_vars import register_supplementaries @@ -16,12 +23,12 @@ def extract_volume( - cube, - z_min, - z_max, - interval_bounds='open', - nearest_value=False -): + cube: Cube, + z_min: float, + z_max: float, + interval_bounds: str = 'open', + nearest_value: bool = False, +) -> Cube: """Subset a cube based on a range of values in the z-coordinate. Function that subsets a cube on a box of (z_min, z_max), @@ -37,21 +44,23 @@ def extract_volume( Parameters ---------- - cube: iris.cube.Cube - input cube. - z_min: float - minimum depth to extract. - z_max: float - maximum depth to extract. - interval_bounds: str - sets left bound of the interval to either 'open', 'closed', + cube: + Input cube. + z_min: + Minimum depth to extract. + z_max: + Maximum depth to extract. + interval_bounds: + Sets left bound of the interval to either 'open', 'closed', 'left_closed' or 'right_closed'. - nearest_value: bool - extracts considering the nearest value of z-coord to z_min and z_max. + nearest_value: + Extracts considering the nearest value of z-coord to z_min and z_max. + Returns ------- iris.cube.Cube z-coord extracted cube. + """ if z_min > z_max: # minimum is below maximum, so switch them around @@ -87,10 +96,16 @@ def extract_volume( return cube.extract(z_constraint) -def calculate_volume(cube): +def calculate_volume(cube: Cube) -> da.core.Array: """Calculate volume from a cube. - This function is used when the volume ancillary variables can't be found. + This function is used when the 'ocean_volume' cell measure can't be found. + + Note + ---- + This only works if the grid cell areas can be calculated (i.e., latitude + and longitude are 1D) and if the depth coordinate is 1D or 4D with first + dimension 1. Parameters ---------- @@ -99,167 +114,199 @@ def calculate_volume(cube): Returns ------- - float - grid volume. + dask.array.core.Array + Grid volumes. + """ - # #### - # Load depth field and figure out which dim is which. + # Load depth field and figure out which dim is which depth = cube.coord(axis='z') - z_dim = cube.coord_dims(cube.coord(axis='z'))[0] + z_dim = cube.coord_dims(depth)[0] - # #### - # Load z direction thickness + # Calculate Z-direction thickness thickness = depth.bounds[..., 1] - depth.bounds[..., 0] - # #### - # Calculate grid volume: - area = da.array(iris.analysis.cartography.area_weights(cube)) + # Try to calculate grid cell area + try: + area = da.array(compute_area_weights(cube)) + except CoordinateMultiDimError: + logger.error( + "Supplementary variables are needed to calculate grid cell " + "areas for irregular grid of cube %s", + cube.summary(shorten=True), + ) + raise + + # Try to calculate grid cell volume as area * thickness if thickness.ndim == 1 and z_dim == 1: grid_volume = area * thickness[None, :, None, None] - if thickness.ndim == 4 and z_dim == 1: + elif thickness.ndim == 4 and z_dim == 1: grid_volume = area * thickness[:, :] + else: + raise ValueError( + f"Supplementary variables are needed to calculate grid cell " + f"volumes for cubes with {thickness.ndim:d}D depth coordinate, " + f"got cube {cube.summary(shorten=True)}" + ) return grid_volume +def _try_adding_calculated_ocean_volume(cube: Cube) -> None: + """Try to add calculated cell measure 'ocean_volume' to cube (in-place).""" + logger.debug( + "Found no cell measure 'ocean_volume' in cube %s. Check availability " + "of supplementary variables", + cube.summary(shorten=True), + ) + logger.debug("Attempting to calculate grid cell volume") + + grid_volume = calculate_volume(cube) + + cell_measure = CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume', + ) + cube.add_cell_measure(cell_measure, np.arange(cube.ndim)) + + @register_supplementaries( variables=['volcello'], required='prefer_at_least_one', ) -def volume_statistics(cube, operator): +def volume_statistics(cube: Cube, operator: str) -> Cube: """Apply a statistical operation over a volume. The volume average is weighted according to the cell volume. Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. The input cube should have a - :class:`iris.coords.CellMeasure` with standard name ``'ocean_volume'``, - unless it has regular 1D latitude and longitude coordinates so the cell - volumes can be computed by using - :func:`iris.analysis.cartography.area_weights` to compute the cell - areas and multiplying those by the cell thickness, computed from the - bounds of the vertical coordinate. - operator: str - The operation to apply to the cube, options are: 'mean'. + :class:`iris.coords.CellMeasure` named ``'ocean_volume'``, unless it + has regular 1D latitude and longitude coordinates so the cell volumes + can be computed by using :func:`iris.analysis.cartography.area_weights` + to compute the cell areas and multiplying those by the cell thickness, + computed from the bounds of the vertical coordinate. + operator: + The operation. Allowed options are: `mean`. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. - Raises - ------ - ValueError - if input cube shape differs from grid volume cube shape. """ # TODO: Test sigma coordinates. # TODO: Add other operations. - if operator != 'mean': - raise ValueError(f'Volume operator {operator} not recognised.') + raise ValueError(f"Volume operator {operator} not recognised.") - try: - grid_volume = cube.cell_measure('ocean_volume').core_data() - except iris.exceptions.CellMeasureNotFoundError: - logger.debug('Cell measure "ocean_volume" not found in cube. ' - 'Check fx_file availability.') - logger.debug('Attempting to calculate grid cell volume...') - grid_volume = calculate_volume(cube) - else: - grid_volume = da.broadcast_to(grid_volume, cube.shape) - - if cube.data.shape != grid_volume.shape: - raise ValueError('Cube shape ({}) doesn`t match grid volume shape ' - f'({cube.shape, grid_volume.shape})') + if not cube.cell_measures('ocean_volume'): + _try_adding_calculated_ocean_volume(cube) - masked_volume = da.ma.masked_where(da.ma.getmaskarray(cube.lazy_data()), - grid_volume) result = cube.collapsed( - [cube.coord(axis='Z'), - cube.coord(axis='Y'), - cube.coord(axis='X')], + [cube.coord(axis='Z'), cube.coord(axis='Y'), cube.coord(axis='X')], iris.analysis.MEAN, - weights=masked_volume) + weights='ocean_volume', + ) return result -def axis_statistics(cube, axis, operator): +def axis_statistics(cube: Cube, axis: str, operator: str) -> Cube: """Perform statistics along a given axis. - Operates over an axis direction. If weights are required, - they are computed using the coordinate bounds. + Operates over an axis direction. + + Note + ---- + The `mean`, `sum` and `rms` operations are weighted by the corresponding + coordinate bounds. For `sum`, the units of the resulting cube will be + multiplied by corresponding coordinate units. Arguments --------- - cube: iris.cube.Cube + cube: Input cube. - axis: str - Direction over where to apply the operator. Possible values - are 'x', 'y', 'z', 't'. - operator: str - Statistics to perform. Available operators are: - 'mean', 'median', 'std_dev', 'sum', 'variance', - 'min', 'max', 'rms'. + axis: + Direction over where to apply the operator. Possible values are `x`, + `y`, `z`, `t`. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. + """ + operation = get_iris_analysis_operation(operator) + + # Check if a coordinate for the desired axis exists try: coord = cube.coord(axis=axis) except iris.exceptions.CoordinateNotFoundError as err: - raise ValueError(f'Axis {axis} not found in cube ' - f'{cube.summary(shorten=True)}') from err + raise ValueError( + f"Axis {axis} not found in cube {cube.summary(shorten=True)}" + ) from err + + # Multidimensional coordinates are currently not supported coord_dims = cube.coord_dims(coord) if len(coord_dims) > 1: - raise NotImplementedError('axis_statistics not implemented for ' - 'multidimensional coordinates.') - operation = get_iris_analysis_operation(operator) + raise NotImplementedError( + "axis_statistics not implemented for multidimensional " + "coordinates." + ) + + # For weighted operations, create a dummy weights coordinate using the + # bounds of the original coordinate (this handles units properly, e.g., for + # sums) if operator_accept_weights(operator): - coord_dim = coord_dims[0] - expand = list(range(cube.ndim)) - expand.remove(coord_dim) - bounds = coord.core_bounds() - weights = np.abs(bounds[..., 1] - bounds[..., 0]) - weights = np.expand_dims(weights, expand) - weights = da.broadcast_to(weights, cube.shape) - result = cube.collapsed(coord, operation, weights=weights) + weights_coord = AuxCoord( + np.abs(coord.core_bounds()[..., 1] - coord.core_bounds()[..., 0]), + long_name=f'{axis}_axis_statistics_weights', + units=coord.units, + ) + cube.add_aux_coord(weights_coord, coord_dims) + result = cube.collapsed(coord, operation, weights=weights_coord) else: result = cube.collapsed(coord, operation) return result -def depth_integration(cube): +def depth_integration(cube: Cube) -> Cube: """Determine the total sum over the vertical component. - Requires a 3D cube. The z-coordinate - integration is calculated by taking the sum in the z direction of the - cell contents multiplied by the cell thickness. + Requires a 3D cube. The z-coordinate integration is calculated by taking + the sum in the z direction of the cell contents multiplied by the cell + thickness. The units of the resulting cube are multiplied by the + z-coordinate units. Arguments --------- - cube: iris.cube.Cube - input cube. + cube: + Input cube. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. + """ result = axis_statistics(cube, axis='z', operator='sum') result.rename('Depth_integrated_' + str(cube.name())) - # result.units = Unit('m') * result.units # This doesn't work: - # TODO: Change units on cube to reflect 2D concentration (not 3D) - # Waiting for news from iris community. return result -def extract_transect(cube, latitude=None, longitude=None): +def extract_transect( + cube: Cube, + latitude: None | float | Iterable[float] = None, + longitude: None | float | Iterable[float] = None, +) -> Cube: """Extract data along a line of constant latitude or longitude. Both arguments, latitude and longitude, are treated identically. @@ -283,29 +330,30 @@ def extract_transect(cube, latitude=None, longitude=None): Parameters ---------- - cube: iris.cube.Cube - input cube. - latitude: None, float or [float, float], optional - transect latiude or range. - longitude: None, float or [float, float], optional - transect longitude or range. + cube: + Input cube. + latitude: optional + Transect latitude or range. + longitude: optional + Transect longitude or range. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. Raises ------ ValueError - slice extraction not implemented for irregular grids. + Slice extraction not implemented for irregular grids. ValueError - latitude and longitude are both floats or lists; not allowed - to slice on both axes at the same time. + Latitude and longitude are both floats or lists; not allowed to slice + on both axes at the same time. + """ # ### coord_dim2 = False - second_coord_range = False + second_coord_range: None | list = None lats = cube.coord('latitude') lons = cube.coord('longitude') @@ -343,13 +391,18 @@ def extract_transect(cube, latitude=None, longitude=None): slices = [slice(None) for i in cube.shape] slices[coord_dim] = coord_index - if second_coord_range: + if second_coord_range is not None: slices[coord_dim2] = slice(second_coord_range[0], second_coord_range[1]) return cube[tuple(slices)] -def extract_trajectory(cube, latitudes, longitudes, number_points=2): +def extract_trajectory( + cube: Cube, + latitudes: Sequence[float], + longitudes: Sequence[float], + number_points: int = 2, +) -> Cube: """Extract data along a trajectory. latitudes and longitudes are the pairs of coordinates for two points. @@ -369,24 +422,25 @@ def extract_trajectory(cube, latitudes, longitudes, number_points=2): Parameters ---------- - cube: iris.cube.Cube - input cube. - latitudes: list - list of latitude coordinates (floats). - longitudes: list - list of longitude coordinates (floats). - number_points: int - number of points to extrapolate (optional). + cube: + Input cube. + latitudes: + Latitude coordinates. + longitudes: + Longitude coordinates. + number_points: optional + Number of points to extrapolate. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. Raises ------ ValueError - if latitude and longitude have different dimensions. + Latitude and longitude have different dimensions. + """ from iris.analysis.trajectory import interpolate diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index cabaadf396..c002b8db80 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -8,6 +8,7 @@ import pytest from cf_units import Unit from iris.cube import Cube +from iris.exceptions import CoordinateMultiDimError from iris.fileformats.pp import EARTH_RADIUS from numpy.testing._private.utils import assert_raises from shapely.geometry import Polygon, mapping @@ -27,11 +28,16 @@ class Test(tests.Test): - """Test class for the :func:`esmvalcore.preprocessor._area_pp` module.""" + """Test class for the :func:`esmvalcore.preprocessor._area` module.""" def setUp(self): """Prepare tests.""" self.coord_sys = iris.coord_systems.GeogCS(EARTH_RADIUS) - data = np.ones((5, 5), dtype=np.float32) + data = np.ones((2, 5, 5), dtype=np.float32) + times = iris.coords.DimCoord( + [0, 1], + standard_name='time', + units='days since 2000-01-01', + ) lons = iris.coords.DimCoord( [i + .5 for i in range(5)], standard_name='longitude', @@ -45,8 +51,12 @@ def setUp(self): units='degrees_north', coord_system=self.coord_sys, ) - coords_spec = [(lats, 0), (lons, 1)] - self.grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) + coords_spec = [(times, 0), (lats, 1), (lons, 2)] + self.grid = iris.cube.Cube( + data, + dim_coords_and_dims=coords_spec, + units='kg m-2 s-1', + ) ndata = np.ones((6, 6)) nlons = iris.coords.DimCoord( @@ -63,86 +73,117 @@ def setUp(self): coord_system=self.coord_sys, ) coords_spec = [(nlats, 0), (nlons, 1)] - self.negative_grid = iris.cube.Cube(ndata, - dim_coords_and_dims=coords_spec) + self.negative_grid = iris.cube.Cube( + ndata, + dim_coords_and_dims=coords_spec, + units='kg m-2 s-1', + ) + + def _add_cell_measure_to_grid(self): + """Add cell_area to self.grid.""" + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube)[0] + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, (1, 2)) def test_area_statistics_mean(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'mean') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_cell_measure_mean(self): """Test for area average of a 2D field. - The area measure is pre-loaded in the cube + The area measure is pre-loaded in the cube. """ - cube = guess_bounds(self.grid, ['longitude', 'latitude']) - grid_areas = iris.analysis.cartography.area_weights(cube) - measure = iris.coords.CellMeasure( - grid_areas, - standard_name='cell_area', - units='m2', - measure='area') - self.grid.add_cell_measure(measure, range(0, measure.ndim)) + self._add_cell_measure_to_grid() result = area_statistics(self.grid, 'mean') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_min(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'min') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_max(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'max') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_median(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'median') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_std_dev(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'std_dev') - expected = np.array([0.], dtype=np.float32) + expected = np.ma.array([0., 0.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_sum(self): """Test for sum of a 2D field.""" result = area_statistics(self.grid, 'sum') grid_areas = iris.analysis.cartography.area_weights(self.grid) - expected = np.sum(grid_areas).astype(np.float32) + grid_sum = np.sum(grid_areas[0]) + expected = np.array([grid_sum, grid_sum]).astype(np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg s-1') + + def test_area_statistics_cell_measure_sum(self): + """Test for area sum of a 2D field. + + The area measure is pre-loaded in the cube. + """ + self._add_cell_measure_to_grid() + grid_areas = iris.analysis.cartography.area_weights(self.grid) + result = area_statistics(self.grid, 'sum') + grid_sum = np.sum(grid_areas[0]) + expected = np.array([grid_sum, grid_sum]).astype(np.float32) + self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg s-1') def test_area_statistics_variance(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'variance') - expected = np.array([0.], dtype=np.float32) + expected = np.ma.array([0., 0.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg2 m-4 s-2') def test_area_statistics_neg_lon(self): """Test for area average of a 2D field.""" result = area_statistics(self.negative_grid, 'mean') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_rms(self): """Test for area rms of a 2D field.""" result = area_statistics(self.grid, 'rms') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_extract_region(self): """Test for extracting a region from a 2D field.""" result = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) # expected outcome - expected = np.ones((2, 2)) + expected = np.ones((2, 2, 2)) self.assert_array_equal(result.data, expected) def test_extract_region_mean(self): @@ -158,10 +199,10 @@ def test_extract_region_mean(self): self.grid.add_cell_measure(measure, range(0, measure.ndim)) region = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) # expected outcome - expected = np.ones((2, 2)) + expected = np.ones((2, 2, 2)) self.assert_array_equal(region.data, expected) result = area_statistics(region, 'mean') - expected_mean = np.array([1.]) + expected_mean = np.ma.array([1., 1.]) self.assert_array_equal(result.data, expected_mean) def test_extract_region_neg_lon(self): @@ -501,6 +542,55 @@ def test_area_statistics_rotated(case): np.testing.assert_array_equal(cube.data, expected) +def create_unstructured_grid_cube(): + """Create test cube with unstructured grid.""" + lat = iris.coords.AuxCoord( + [0, 1, 2], var_name='lat', standard_name='latitude', units='degrees', + ) + lon = iris.coords.AuxCoord( + [0, 1, 2], var_name='lon', standard_name='longitude', units='degrees', + ) + cube = iris.cube.Cube( + [0, 10, 20], + var_name='tas', + units='K', + aux_coords_and_dims=[(lat, 0), (lon, 0)], + ) + return cube + + +def test_area_statistics_max_irregular_grid(): + """Test ``area_statistics``.""" + values = np.arange(12).reshape(2, 2, 3) + cube = create_irregular_grid_cube(values, values[0, ...], values[0, ...]) + result = area_statistics(cube, 'max') + assert isinstance(result, Cube) + np.testing.assert_array_equal(result.data, [5, 11]) + + +def test_area_statistics_max_unstructured_grid(): + """Test ``area_statistics``.""" + cube = create_unstructured_grid_cube() + result = area_statistics(cube, 'max') + assert isinstance(result, Cube) + np.testing.assert_array_equal(result.data, 20) + + +def test_area_statistics_sum_irregular_grid_fail(): + """Test ``area_statistics``.""" + values = np.arange(12).reshape(2, 2, 3) + cube = create_irregular_grid_cube(values, values[0, ...], values[0, ...]) + with pytest.raises(CoordinateMultiDimError): + area_statistics(cube, 'sum') + + +def test_area_statistics_sum_unstructured_grid_fail(): + """Test ``area_statistics``.""" + cube = create_unstructured_grid_cube() + with pytest.raises(CoordinateMultiDimError): + area_statistics(cube, 'sum') + + @pytest.fixture def make_testcube(): """Create a test cube on a Cartesian grid.""" diff --git a/tests/unit/preprocessor/_time/test_time.py b/tests/unit/preprocessor/_time/test_time.py index ab9e086532..b847a2972f 100644 --- a/tests/unit/preprocessor/_time/test_time.py +++ b/tests/unit/preprocessor/_time/test_time.py @@ -532,7 +532,11 @@ def _create_cube(data, times, bounds): standard_name='time', units=Unit('days since 1950-01-01', calendar='gregorian')) - cube = iris.cube.Cube(data, dim_coords_and_dims=[(time, 0)]) + cube = iris.cube.Cube( + data, + dim_coords_and_dims=[(time, 0)], + units='kg m-2 s-1' + ) return cube def test_time_mean(self): @@ -545,6 +549,7 @@ def test_time_mean(self): result = climate_statistics(cube, operator='mean') expected = np.array([1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_mean_uneven(self): """Test for time average of a 1D field with uneven time boundaries.""" @@ -556,6 +561,7 @@ def test_time_mean_uneven(self): result = climate_statistics(cube, operator='mean') expected = np.array([4.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_mean_365_day(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -568,6 +574,7 @@ def test_time_mean_365_day(self): result = climate_statistics(cube, operator='mean') expected = np.array([1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_sum(self): """Test for time sum of a 1D field.""" @@ -577,8 +584,9 @@ def test_time_sum(self): cube = self._create_cube(data, times, bounds) result = climate_statistics(cube, operator='sum') - expected = np.array([4.], dtype=np.float32) + expected = np.array([120.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_time_sum_weighted(self): """Test for time sum of a 1D field.""" @@ -590,6 +598,7 @@ def test_time_sum_weighted(self): result = climate_statistics(cube, operator='sum') expected = np.array([74.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_time_sum_uneven(self): """Test for time sum of a 1D field with uneven time boundaries.""" @@ -601,6 +610,7 @@ def test_time_sum_uneven(self): result = climate_statistics(cube, operator='sum') expected = np.array([16.0], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_time_sum_365_day(self): """Test for time sum of a realistic time axis and 365 day calendar.""" @@ -614,6 +624,7 @@ def test_time_sum_365_day(self): result = climate_statistics(cube, operator='sum') expected = np.array([211.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_season_climatology(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -627,6 +638,7 @@ def test_season_climatology(self): result = climate_statistics(cube, operator='mean', period=period) expected = np.array([1., 1., 1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_custom_season_climatology(self): """Test for time avg of a realisitc time axis and 365 day calendar.""" @@ -643,6 +655,7 @@ def test_custom_season_climatology(self): seasons=('jfmamj', 'jasond')) expected = np.array([1., 1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_monthly(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -656,6 +669,7 @@ def test_monthly(self): result = climate_statistics(cube, operator='mean', period=period) expected = np.ones((6, ), dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_day(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -669,6 +683,7 @@ def test_day(self): result = climate_statistics(cube, operator='mean', period=period) expected = np.array([1, 1, 1], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_hour(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -684,6 +699,7 @@ def test_hour(self): assert_array_equal(result.data, expected) expected_hours = [0, 1, 2] assert_array_equal(result.coord('hour').points, expected_hours) + self.assertEqual(result.units, 'kg m-2 s-1') def test_period_not_supported(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -706,6 +722,7 @@ def test_time_max(self): result = climate_statistics(cube, operator='max') expected = np.array([2.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_min(self): """Test for time min of a 1D field.""" @@ -717,6 +734,7 @@ def test_time_min(self): result = climate_statistics(cube, operator='min') expected = np.array([0.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_median(self): """Test for time meadian of a 1D field.""" @@ -728,6 +746,7 @@ def test_time_median(self): result = climate_statistics(cube, operator='median') expected = np.array([1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_rms(self): """Test for time rms of a 1D field.""" @@ -739,6 +758,7 @@ def test_time_rms(self): result = climate_statistics(cube, operator='rms') expected = np.array([(5 / 3)**0.5], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_dependent_fx(self): """Test average time dimension in time-dependent fx vars.""" @@ -768,6 +788,7 @@ def test_time_dependent_fx(self): self.assertEqual(result.cell_measure('ocean_volume').ndim, 2) self.assertEqual( result.ancillary_variable('land_ice_area_fraction').ndim, 2) + self.assertEqual(result.units, 'kg m-2 s-1') class TestSeasonalStatistics(tests.Test): @@ -1844,7 +1865,11 @@ def _make_cube(): coord_system=coord_sys) lons = get_lon_coord() coords_spec4 = [(time, 0), (zcoord, 1), (lats, 2), (lons, 3)] - cube1 = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec4) + cube1 = iris.cube.Cube( + data2, + dim_coords_and_dims=coords_spec4, + units='kg m-2 s-1', + ) return cube1 @@ -1928,12 +1953,13 @@ def test_climate_statistics_0d_time_1d_lon(): lons = get_lon_coord() cube = iris.cube.Cube([[1.0, -1.0, 42.0]], var_name='x', - units='K', + units='K day-1', dim_coords_and_dims=[(time, 0), (lons, 1)]) new_cube = climate_statistics(cube, operator='sum', period='full') assert cube.shape == (1, 3) assert new_cube.shape == (3, ) - np.testing.assert_allclose(new_cube.data, [1.0, -1.0, 42.0]) + np.testing.assert_allclose(new_cube.data, [2.0, -2.0, 84.0]) + assert new_cube.units == 'K' def test_climate_statistics_complex_cube_sum(): @@ -1943,6 +1969,7 @@ def test_climate_statistics_complex_cube_sum(): assert cube.shape == (2, 1, 1, 3) assert new_cube.shape == (1, 1, 3) np.testing.assert_allclose(new_cube.data, [[[45.0, 45.0, 45.0]]]) + assert new_cube.units == '86400 kg m-2' def test_climate_statistics_complex_cube_mean(): @@ -1952,6 +1979,7 @@ def test_climate_statistics_complex_cube_mean(): assert cube.shape == (2, 1, 1, 3) assert new_cube.shape == (1, 1, 3) np.testing.assert_allclose(new_cube.data, [[[1.0, 1.0, 1.0]]]) + assert new_cube.units == 'kg m-2 s-1' class TestResampleHours(tests.Test): diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index 45f4b286fd..94c37614d0 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -3,8 +3,10 @@ import unittest import iris +import iris.fileformats import numpy as np from cf_units import Unit +from iris.exceptions import CoordinateMultiDimError import tests from esmvalcore.preprocessor._volume import ( @@ -53,6 +55,16 @@ def setUp(self): [25., 250.]], units='m', attributes={'positive': 'down'}) + zcoord_4d = iris.coords.AuxCoord( + np.broadcast_to([[[[0.5]], [[5.]], [[50.]]]], (2, 3, 2, 2)), + long_name='zcoord', + bounds=np.broadcast_to( + [[[[[0., 2.5]]], [[[2.5, 25.]]], [[[25., 250.]]]]], + (2, 3, 2, 2, 2), + ), + units='m', + attributes={'positive': 'down'}, + ) lons2 = iris.coords.DimCoord([1.5, 2.5], standard_name='longitude', bounds=[[1., 2.], [2., 3.]], @@ -68,16 +80,31 @@ def setUp(self): self.grid_3d = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec3) coords_spec4 = [(time, 0), (zcoord, 1), (lats2, 2), (lons2, 3)] - self.grid_4d = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec4) + self.grid_4d = iris.cube.Cube( + data2, + dim_coords_and_dims=coords_spec4, + units='kg m-3', + ) coords_spec5 = [(time2, 0), (zcoord, 1), (lats2, 2), (lons2, 3)] - self.grid_4d_2 = iris.cube.Cube(data3, - dim_coords_and_dims=coords_spec5) + self.grid_4d_2 = iris.cube.Cube( + data3, + dim_coords_and_dims=coords_spec5, + units='kg m-3', + ) + + self.grid_4d_z = iris.cube.Cube( + data2, + dim_coords_and_dims=[(time, 0), (lats2, 2), (lons2, 3)], + aux_coords_and_dims=[(zcoord_4d, (0, 1, 2, 3))], + units='kg m-3', + ) # allow iris to figure out the axis='z' coordinate iris.util.guess_coord_axis(self.grid_3d.coord('zcoord')) iris.util.guess_coord_axis(self.grid_4d.coord('zcoord')) iris.util.guess_coord_axis(self.grid_4d_2.coord('zcoord')) + iris.util.guess_coord_axis(self.grid_4d_z.coord('zcoord')) def test_axis_statistics_mean(self): """Test axis statistics with operator mean.""" @@ -88,6 +115,7 @@ def test_axis_statistics_mean(self): weights = (bounds[:, 1] - bounds[:, 0]) expected = np.average(data, axis=1, weights=weights) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_median(self): """Test axis statistics in with operator median.""" @@ -96,6 +124,7 @@ def test_axis_statistics_median(self): result = axis_statistics(self.grid_4d, 'z', 'median') expected = np.median(data, axis=1) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_min(self): """Test axis statistics with operator min.""" @@ -104,6 +133,7 @@ def test_axis_statistics_min(self): result = axis_statistics(self.grid_4d, 'z', 'min') expected = np.min(data, axis=1) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_max(self): """Test axis statistics with operator max.""" @@ -112,6 +142,7 @@ def test_axis_statistics_max(self): result = axis_statistics(self.grid_4d, 'z', 'max') expected = np.max(data, axis=1) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_rms(self): """Test axis statistics with operator rms.""" @@ -124,20 +155,23 @@ def test_axis_statistics_std(self): result = axis_statistics(self.grid_4d, 'z', 'std_dev') expected = np.ma.zeros((2, 2, 2)) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_variance(self): """Test axis statistics with operator variance.""" result = axis_statistics(self.grid_4d, 'z', 'variance') expected = np.ma.zeros((2, 2, 2)) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg2 m-6') def test_axis_statistics_sum(self): """Test axis statistics in multiple operators.""" result = axis_statistics(self.grid_4d, 'z', 'sum') expected = np.ma.ones((2, 2, 2)) * 250 self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2') - def test_wrong_axis_statistics(self): + def test_wrong_axis_statistics_fail(self): """Test raises error when axis is not found in cube.""" with self.assertRaises(ValueError) as err: axis_statistics(self.grid_3d, 't', 'mean') @@ -145,7 +179,7 @@ def test_wrong_axis_statistics(self): f'Axis t not found in cube {self.grid_3d.summary(shorten=True)}', str(err.exception)) - def test_multidimensional_axis_statistics(self): + def test_multidimensional_axis_statistics_fail(self): i_coord = iris.coords.DimCoord( [0, 1], long_name='cell index along first dimension', @@ -260,12 +294,14 @@ def test_extract_volume_mean(self): result_mean = volume_statistics(result, 'mean') expected_mean = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result_mean.data, expected_mean) + self.assertEqual(result_mean.units, 'kg m-3') def test_volume_statistics(self): """Test to take the volume weighted average of a (2,3,2,2) cube.""" result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_cell_measure(self): """Test to take the volume weighted average of a (2,3,2,2) cube. @@ -281,6 +317,7 @@ def test_volume_statistics_cell_measure(self): result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_long(self): """Test to take the volume weighted average of a (4,3,2,2) cube. @@ -291,6 +328,7 @@ def test_volume_statistics_long(self): result = volume_statistics(self.grid_4d_2, 'mean') expected = np.ma.array([1., 1., 1., 1.], mask=False) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_masked_level(self): """Test to take the volume weighted average of a (2,3,2,2) cube where @@ -307,6 +345,7 @@ def test_volume_statistics_masked_timestep(self): result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1], mask=[True, False]) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_weights(self): """Test to take the volume weighted average of a (2,3,2,2) cube. @@ -324,13 +363,46 @@ def test_volume_statistics_weights(self): expected = np.ma.array([8.333333333333334, 19.144144144144143], mask=[False, False]) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') - def test_volume_statistics_wrong_operator(self): + def test_volume_statistics_wrong_operator_fail(self): with self.assertRaises(ValueError) as err: volume_statistics(self.grid_4d, 'wrong') self.assertEqual('Volume operator wrong not recognised.', str(err.exception)) + def test_volume_statistics_2d_lat_fail(self): + # Create dummy 2D latitude from depth + new_lat_coord = self.grid_4d_z.coord('zcoord')[0, 0, :, :] + new_lat_coord.rename('latitude') + self.grid_4d_z.remove_coord('latitude') + self.grid_4d_z.add_aux_coord(new_lat_coord, (2, 3)) + with self.assertRaises(CoordinateMultiDimError): + volume_statistics(self.grid_4d_z, 'mean') + + def test_volume_statistics_4d_depth_fail(self): + # Fails because depth coord dims are (0, ...), but must be (1, ...) + with self.assertRaises(ValueError) as err: + volume_statistics(self.grid_4d_z, 'mean') + self.assertIn( + "Supplementary variables are needed to calculate grid cell " + "volumes for cubes with 4D depth coordinate, got cube ", + str(err.exception), + ) + + def test_volume_statistics_2d_depth_fail(self): + # Create new 2D depth coord + new_z_coord = self.grid_4d_z.coord('zcoord')[0, :, :, 0] + self.grid_4d_z.remove_coord('zcoord') + self.grid_4d_z.add_aux_coord(new_z_coord, (1, 2)) + with self.assertRaises(ValueError) as err: + volume_statistics(self.grid_4d_z, 'mean') + self.assertIn( + "Supplementary variables are needed to calculate grid cell " + "volumes for cubes with 2D depth coordinate, got cube ", + str(err.exception), + ) + def test_depth_integration_1d(self): """Test to take the depth integration of a 3 layer cube.""" result = depth_integration(self.grid_3d[:, 0, 0])