ESMValGroup · nielsdrost · Feb 2, 2021 · Jan 28, 2021 · Jan 28, 2021 · Jan 29, 2021
diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst
@@ -305,3 +305,30 @@ strictness to the highest:
 - ``strict``: fail if there are any warnings, this is the highest level of
     strictness. Mostly useful for checking datasets that you have produced, to
     be sure that future users will not be distracted by inoffensive warnings.
+
+
+Natively supported non-CMOR datasets
+====================================
+
+Fixed datasets are supported through the ``native6`` project. Below is a list of
+datasets currently supported.
+
+ERA5
+----
+
+- Supported variables: ``clt``, ``evspsbl``, ``evspsblpot``, ``mrro``, ``pr``, ``prsn``, ``ps``, ``psl``, ``ptype``, ``rls``, ``rlds``, ``rsds``, ``rsdt``, ``rss``, ``uas``, ``vas``, ``tas``, ``tasmax``, ``tasmin``, ``tdps``, ``ts``, ``tsn`` (``E1hr``/``Amon``), ``orog`` (``fx``)
+- Tier: 3
+
+MSWEP
+-----
+
+- Supported variables: ``pr``
+- Supported frequencies: ``mon``, ``day``, `3hr``.
+- Tier: 3
+
+For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversion/mon/pr`` subdirectory of your ``native6`` project location.
+
+.. note::
+  For monthly data (V220), the data must be postfixed with the date, i.e. rename ``global_monthly_050deg.nc`` to ``global_monthly_050deg_197901-201710.nc``
+
+For more info: http://www.gloh2o.org/
diff --git a/esmvalcore/cmor/_fixes/native6/mswep.py b/esmvalcore/cmor/_fixes/native6/mswep.py
@@ -0,0 +1,129 @@
+"""Fixes for MSWEP."""
+from datetime import datetime
+
+import cf_units
+import numpy as np
+from cf_units import Unit
+
+from ..fix import Fix
+
+
+def fix_time_month(cube):
+    """Fix time coordinates for monthly values.
+
+    Convert from months since 1899-12 to days since 1850 as per CMOR
+    standard.
+    """
+    time_coord = cube.coord('time')
+    origin = time_coord.units.origin
+
+    origin_year, origin_month = [
+        int(val) for val in origin.split()[2].split('-')
+    ]
+
+    dates = []
+
+    for time_point in time_coord.points:
+        new_year = origin_year + (origin_month - 1 + time_point) // 12
+        new_month = (origin_month - 1 + time_point) % 12 + 1
+        dates.append(datetime(int(new_year), int(new_month), 15))
+
+    t_unit = cf_units.Unit("days since 1850-01-01", calendar="standard")
+
+    cube.coord('time').points = t_unit.date2num(dates)
+    cube.coord('time').units = t_unit
+
+
+def fix_time_day(cube):
+    """Fix time coordinates for monthly values.
+
+    Convert from days since 1899-12-31 to days since 1850 as per CMOR
+    standard.
+    """
+    time_coord = cube.coord('time')
+    time_coord.convert_units('days since 1850-1-1 00:00:00.0')
+
+
+def fix_longitude(cube):
+    """Fix longitude coordinate from -180:180 to 0:360."""
+    lon_axis = cube.coord_dims('longitude')
+    lon = cube.coord(axis='X')
+
+    if not lon.is_monotonic():
+        raise ValueError("Data must be monotonic to fix longitude.")
+
+    # roll data because iris forces `lon.points` to be strictly monotonic.
+    shift = np.sum(lon.points < 0)
+    points = np.roll(lon.points, -shift) % 360
+    cube.data = np.roll(cube.core_data(), -shift, axis=lon_axis)
+
+    lon.points = points
+
+
+class Pr(Fix):
+    """Fixes for pr."""
+
+    def fix_metadata(self, cubes):
+        """Fix metadata."""
+        for cube in cubes:
+            self._fix_names(cube)
+            self._fix_units(cube)
+            self._fix_time(cube)
+            fix_longitude(cube)
+            self._fix_bounds(cube)
+
+        return cubes
+
+    def _fix_time(self, cube):
+        """Fix time."""
+        frequency = self.vardef.frequency
+
+        if frequency in ('day', '3hr'):
+            fix_time_day(cube)
+        elif frequency == 'mon':
+            fix_time_month(cube)
+        else:
+            raise ValueError(f'Cannot fix time for frequency: {frequency!r}')
+
+    def _fix_units(self, cube):
+        """Convert units from mm/[t] to kg m-2 s-1 units."""
+        frequency = self.vardef.frequency
+
+        cube.units = Unit(self.vardef.units)
+
+        if frequency in ('day', '3hr'):
+            # divide by number of seconds in a month
+            cube.data = cube.core_data() / 60 * 60 * 24 * 30
+        elif frequency == 'mon':
+            # divide by number of seconds in a day
+            cube.data = cube.core_data() / 60 * 60 * 24
+        else:
+            raise ValueError(f'Cannot fix units for frequency: {frequency!r}')
+
+    def _fix_bounds(self, cube):
+        """Add bounds to coords."""
+        coord_defs = tuple(coord_def
+                           for coord_def in self.vardef.coordinates.values())
+
+        for coord_def in coord_defs:
+            if not coord_def.must_have_bounds == 'yes':
+                continue
+
+            coord = cube.coord(axis=coord_def.axis)
+
+            if coord.bounds is None:
+                coord.guess_bounds()
+
+    def _fix_names(self, cube):
+        """Fix miscellaneous."""
+        cube.var_name = self.vardef.short_name
+        cube.standard_name = self.vardef.standard_name
+        cube.long_name = self.vardef.long_name
+
+        coord_defs = tuple(coord_def
+                           for coord_def in self.vardef.coordinates.values())
+
+        for coord_def in coord_defs:
+            coord = cube.coord(axis=coord_def.axis)
+            if not coord.long_name:
+                coord.long_name = coord_def.long_name
diff --git a/tests/integration/cmor/_fixes/native6/mswep_day.nc b/tests/integration/cmor/_fixes/native6/mswep_day.nc
diff --git a/tests/integration/cmor/_fixes/native6/mswep_month.nc b/tests/integration/cmor/_fixes/native6/mswep_month.nc
diff --git a/tests/integration/cmor/_fixes/native6/test_mswep.py b/tests/integration/cmor/_fixes/native6/test_mswep.py
@@ -0,0 +1,133 @@
+"""Tests for the fixes of MSWEP."""
+from pathlib import Path
+
+import iris
+import numpy as np
+import pytest
+
+from esmvalcore.cmor._fixes.native6.mswep import (
+    Pr,
+    fix_longitude,
+    fix_time_day,
+    fix_time_month,
+)
+from esmvalcore.cmor.fix import Fix
+from esmvalcore.cmor.table import CMOR_TABLES
+
+
+@pytest.mark.parametrize('mip_table', ('Amon', 'day'))
+def test_get_pr_fix(mip_table):
+    """Test whether the right fix gets found."""
+    fix = Fix.get_fixes('native6', 'MSWEP', mip_table, 'pr')
+    assert isinstance(fix[0], Pr)
+
+
+@pytest.fixture
+def cube_month():
+    """Return extract from mswep monthly data (shape 3x5x5)."""
+    # out = cube[0:3, 0:360:72, 0:720:144]
+    # iris.save(out, 'mswep_month.nc')
+    path = Path(__file__).with_name('mswep_month.nc')
+    return iris.load_cube(str(path))
+
+
+@pytest.fixture
+def cube_day():
+    """Return extract from mswep daily data (shape 3x5x5)."""
+    # out = cube[0:3, 0:360:72, 0:720:144]
+    # iris.save(out, 'mswep_day.nc')
+    path = Path(__file__).with_name('mswep_day.nc')
+    return iris.load_cube(str(path))
+
+
+@pytest.fixture
+def fix_month():
+    """Return fix for monthly pr data."""
+    cmor_table = CMOR_TABLES['native6']
+    vardef = cmor_table.get_variable('Amon', 'pr')
+    return Pr(vardef)
+
+
+@pytest.fixture
+def fix_day():
+    """Return fix for daily pr data."""
+    cmor_table = CMOR_TABLES['native6']
+    vardef = cmor_table.get_variable('day', 'pr')
+    return Pr(vardef)
+
+
+def test_fix_names(fix_month, cube_month):
+    """Test `Pr._fix_names`."""
+    fix_month._fix_names(cube_month)
+
+    vardef = fix_month.vardef
+
+    assert cube_month.var_name == vardef.short_name
+    assert cube_month.long_name == vardef.long_name
+    assert cube_month.standard_name == vardef.standard_name
+
+    coord_defs = tuple(coord_def for coord_def in vardef.coordinates.values())
+
+    for coord_def in coord_defs:
+        coord = cube_month.coord(axis=coord_def.axis)
+        assert coord.long_name == coord_def.long_name
+
+
+def test_fix_units_month(fix_month, cube_month):
+    """Test `Pr._fix_units_month`."""
+    fix_month._fix_units(cube_month)
+    assert cube_month.units == fix_month.vardef.units
+
+
+def test_fix_units_day(fix_day, cube_day):
+    """Test `Pr._fix_units_day`."""
+    fix_day._fix_units(cube_day)
+    assert cube_day.units == fix_day.vardef.units
+
+
+def test_fix_time_month(cube_month):
+    """Test `fix_time_month`."""
+    fix_time_month(cube_month)
+
+    time = cube_month.coord('time')
+    assert time.units == 'days since 1850-01-01'
+
+
+def test_fix_time_day(cube_day):
+    """Test `fix_time_day`."""
+    fix_time_day(cube_day)
+
+    time = cube_day.coord('time')
+    assert time.units == 'days since 1850-01-01'
+
+
+def test_fix_longitude(fix_month, cube_month):
+    """Test `Pr._fix_longitude`."""
+    unfixed_data = cube_month.data.copy()
+    unfixed_lon = cube_month.coord(axis='X')
+    shift = (unfixed_lon.points < 0).sum()
+
+    fix_longitude(cube_month)
+
+    lon = cube_month.coord(axis='X')
+
+    assert lon.is_monotonic
+
+    coord_def = fix_month.vardef.coordinates['longitude']
+    valid_min = float(coord_def.valid_min)
+    valid_max = float(coord_def.valid_max)
+
+    assert lon.points.min() >= valid_min
+    assert lon.points.max() <= valid_max
+
+    # make sure that data are rolled correctly along lon axis
+    assert np.all(unfixed_data[:, :, 0] == cube_month.data[:, :, -shift])
+
+
+def test_fix_bounds(fix_month, cube_month):
+    """Test `Pr._fix_bounds`."""
+    fix_month._fix_bounds(cube_month)
+
+    for axis in 'XYT':
+        coord = cube_month.coord(axis=axis)
+        assert coord.has_bounds()