Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions doc/develop/fixing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,30 @@ strictness to the highest:
- ``strict``: fail if there are any warnings, this is the highest level of
strictness. Mostly useful for checking datasets that you have produced, to
be sure that future users will not be distracted by inoffensive warnings.


Natively supported non-CMOR datasets
====================================

Fixed datasets are supported through the ``native6`` project. Below is a list of
datasets currently supported.

ERA5
----

- Supported variables: ``clt``, ``evspsbl``, ``evspsblpot``, ``mrro``, ``pr``, ``prsn``, ``ps``, ``psl``, ``ptype``, ``rls``, ``rlds``, ``rsds``, ``rsdt``, ``rss``, ``uas``, ``vas``, ``tas``, ``tasmax``, ``tasmin``, ``tdps``, ``ts``, ``tsn`` (``E1hr``/``Amon``), ``orog`` (``fx``)
- Tier: 3

MSWEP
-----

- Supported variables: ``pr``
- Supported frequencies: ``mon``, ``day``, `3hr``.
- Tier: 3

For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversion/mon/pr`` subdirectory of your ``native6`` project location.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a general structure now for all datasets or specific to MSWEP? As in: do we need the same for ERA5?

Also: would it make sense to use the actual version ("V220") instead of the latest version?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just put ERA5 there so the documentation is complete, I agree that this is best done in a separate PR for documentation fixes. Putting latestversion in the path always works, whereas putting V220 may go out of date at some point.

This is not meant as a general structure, just something that should be complete for now. When more datasets will be supported, we can think about how to better organize the documentation there.


.. note::
For monthly data (V220), the data must be postfixed with the date, i.e. rename ``global_monthly_050deg.nc`` to ``global_monthly_050deg_197901-201710.nc``

For more info: http://www.gloh2o.org/
129 changes: 129 additions & 0 deletions esmvalcore/cmor/_fixes/native6/mswep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Fixes for MSWEP."""
from datetime import datetime

import cf_units
import numpy as np
from cf_units import Unit

from ..fix import Fix


def fix_time_month(cube):
"""Fix time coordinates for monthly values.

Convert from months since 1899-12 to days since 1850 as per CMOR
standard.
"""
time_coord = cube.coord('time')
origin = time_coord.units.origin

origin_year, origin_month = [
int(val) for val in origin.split()[2].split('-')
]

dates = []

for time_point in time_coord.points:
new_year = origin_year + (origin_month - 1 + time_point) // 12
new_month = (origin_month - 1 + time_point) % 12 + 1
dates.append(datetime(int(new_year), int(new_month), 15))

t_unit = cf_units.Unit("days since 1850-01-01", calendar="standard")

cube.coord('time').points = t_unit.date2num(dates)
cube.coord('time').units = t_unit


def fix_time_day(cube):
"""Fix time coordinates for monthly values.

Convert from days since 1899-12-31 to days since 1850 as per CMOR
standard.
"""
time_coord = cube.coord('time')
time_coord.convert_units('days since 1850-1-1 00:00:00.0')


def fix_longitude(cube):
"""Fix longitude coordinate from -180:180 to 0:360."""
lon_axis = cube.coord_dims('longitude')
lon = cube.coord(axis='X')

if not lon.is_monotonic():
raise ValueError("Data must be monotonic to fix longitude.")

# roll data because iris forces `lon.points` to be strictly monotonic.
shift = np.sum(lon.points < 0)
points = np.roll(lon.points, -shift) % 360
cube.data = np.roll(cube.core_data(), -shift, axis=lon_axis)

lon.points = points


class Pr(Fix):
"""Fixes for pr."""

def fix_metadata(self, cubes):
"""Fix metadata."""
for cube in cubes:
self._fix_names(cube)
self._fix_units(cube)
self._fix_time(cube)
fix_longitude(cube)
self._fix_bounds(cube)

return cubes

def _fix_time(self, cube):
"""Fix time."""
frequency = self.vardef.frequency

if frequency in ('day', '3hr'):
fix_time_day(cube)
elif frequency == 'mon':
fix_time_month(cube)
else:
raise ValueError(f'Cannot fix time for frequency: {frequency!r}')

def _fix_units(self, cube):
"""Convert units from mm/[t] to kg m-2 s-1 units."""
frequency = self.vardef.frequency

cube.units = Unit(self.vardef.units)

if frequency in ('day', '3hr'):
# divide by number of seconds in a month
cube.data = cube.core_data() / 60 * 60 * 24 * 30
elif frequency == 'mon':
# divide by number of seconds in a day
cube.data = cube.core_data() / 60 * 60 * 24
else:
raise ValueError(f'Cannot fix units for frequency: {frequency!r}')

def _fix_bounds(self, cube):
"""Add bounds to coords."""
coord_defs = tuple(coord_def
for coord_def in self.vardef.coordinates.values())

for coord_def in coord_defs:
if not coord_def.must_have_bounds == 'yes':
continue

coord = cube.coord(axis=coord_def.axis)

if coord.bounds is None:
coord.guess_bounds()

def _fix_names(self, cube):
"""Fix miscellaneous."""
cube.var_name = self.vardef.short_name
cube.standard_name = self.vardef.standard_name
cube.long_name = self.vardef.long_name

coord_defs = tuple(coord_def
for coord_def in self.vardef.coordinates.values())

for coord_def in coord_defs:
coord = cube.coord(axis=coord_def.axis)
if not coord.long_name:
coord.long_name = coord_def.long_name
Binary file not shown.
Binary file not shown.
133 changes: 133 additions & 0 deletions tests/integration/cmor/_fixes/native6/test_mswep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""Tests for the fixes of MSWEP."""
from pathlib import Path

import iris
import numpy as np
import pytest

from esmvalcore.cmor._fixes.native6.mswep import (
Pr,
fix_longitude,
fix_time_day,
fix_time_month,
)
from esmvalcore.cmor.fix import Fix
from esmvalcore.cmor.table import CMOR_TABLES


@pytest.mark.parametrize('mip_table', ('Amon', 'day'))
def test_get_pr_fix(mip_table):
"""Test whether the right fix gets found."""
fix = Fix.get_fixes('native6', 'MSWEP', mip_table, 'pr')
assert isinstance(fix[0], Pr)


@pytest.fixture
def cube_month():
"""Return extract from mswep monthly data (shape 3x5x5)."""
# out = cube[0:3, 0:360:72, 0:720:144]
# iris.save(out, 'mswep_month.nc')
path = Path(__file__).with_name('mswep_month.nc')
return iris.load_cube(str(path))


@pytest.fixture
def cube_day():
"""Return extract from mswep daily data (shape 3x5x5)."""
# out = cube[0:3, 0:360:72, 0:720:144]
# iris.save(out, 'mswep_day.nc')
path = Path(__file__).with_name('mswep_day.nc')
return iris.load_cube(str(path))


@pytest.fixture
def fix_month():
"""Return fix for monthly pr data."""
cmor_table = CMOR_TABLES['native6']
vardef = cmor_table.get_variable('Amon', 'pr')
return Pr(vardef)


@pytest.fixture
def fix_day():
"""Return fix for daily pr data."""
cmor_table = CMOR_TABLES['native6']
vardef = cmor_table.get_variable('day', 'pr')
return Pr(vardef)


def test_fix_names(fix_month, cube_month):
"""Test `Pr._fix_names`."""
fix_month._fix_names(cube_month)

vardef = fix_month.vardef

assert cube_month.var_name == vardef.short_name
assert cube_month.long_name == vardef.long_name
assert cube_month.standard_name == vardef.standard_name

coord_defs = tuple(coord_def for coord_def in vardef.coordinates.values())

for coord_def in coord_defs:
coord = cube_month.coord(axis=coord_def.axis)
assert coord.long_name == coord_def.long_name


def test_fix_units_month(fix_month, cube_month):
"""Test `Pr._fix_units_month`."""
fix_month._fix_units(cube_month)
assert cube_month.units == fix_month.vardef.units


def test_fix_units_day(fix_day, cube_day):
"""Test `Pr._fix_units_day`."""
fix_day._fix_units(cube_day)
assert cube_day.units == fix_day.vardef.units


def test_fix_time_month(cube_month):
"""Test `fix_time_month`."""
fix_time_month(cube_month)

time = cube_month.coord('time')
assert time.units == 'days since 1850-01-01'


def test_fix_time_day(cube_day):
"""Test `fix_time_day`."""
fix_time_day(cube_day)

time = cube_day.coord('time')
assert time.units == 'days since 1850-01-01'


def test_fix_longitude(fix_month, cube_month):
"""Test `Pr._fix_longitude`."""
unfixed_data = cube_month.data.copy()
unfixed_lon = cube_month.coord(axis='X')
shift = (unfixed_lon.points < 0).sum()

fix_longitude(cube_month)

lon = cube_month.coord(axis='X')

assert lon.is_monotonic

coord_def = fix_month.vardef.coordinates['longitude']
valid_min = float(coord_def.valid_min)
valid_max = float(coord_def.valid_max)

assert lon.points.min() >= valid_min
assert lon.points.max() <= valid_max

# make sure that data are rolled correctly along lon axis
assert np.all(unfixed_data[:, :, 0] == cube_month.data[:, :, -shift])


def test_fix_bounds(fix_month, cube_month):
"""Test `Pr._fix_bounds`."""
fix_month._fix_bounds(cube_month)

for axis in 'XYT':
coord = cube_month.coord(axis=axis)
assert coord.has_bounds()