From b2109a7279e118d1d224aac2b21b62c55bddf3d3 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 26 Sep 2019 14:27:58 +0100 Subject: [PATCH 01/24] optional for multiple exps --- esmvalcore/preprocessor/_io.py | 76 ++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index f8c1bbbd9b..f1bb59e471 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -90,6 +90,16 @@ def concatenate(cubes): concatenated = iris.cube.CubeList(cubes).concatenate() if len(concatenated) == 1: return concatenated[0] + else: + if len(cubes) == 2: + if cubes[0].coord('time').points[0] <= \ + cubes[0].coord('time').points[0]: + cubes = [cubes[0], cubes[1]] + else: + cubes = [cubes[1], cubes[0]] + concatenated = _concatenate_two_overlapping_cubes(cubes) + if len(concatenated) == 1: + return concatenated logger.error('Can not concatenate cubes into a single one.') logger.error('Resulting cubes:') for cube in concatenated: @@ -274,3 +284,69 @@ def _write_ncl_metadata(output_dir, metadata): write_ncl_settings(info, filename) return filename + + +def _concatenate_two_overlapping_cubes(cubes): + """Concatenate time-overlapping cubes (two cubes only).""" + c1 = cubes[0] + c2 = cubes[1] + common_time_points = [t for t in c1.coord('time').points \ + if t in c2.coord('time').points] + if common_time_points: + time_units = c1.coord('time').units + cstart_year = time_units.num2date(common_time_points[0]).year + cstart_month = time_units.num2date(common_time_points[0]).month + cstart_day = time_units.num2date(common_time_points[0]).day + cend_year = time_units.num2date(common_time_points[-1]).year + cend_month = time_units.num2date(common_time_points[-1]).month + cend_day = time_units.num2date(common_time_points[-1]).day + c1start_year = time_units.num2date(common_time_points[0]).year + c1start_month = time_units.num2date(c1.coord('time').points[0]).month + c1start_day = time_units.num2date(c1.coord('time').points[0]).day + c1end_year = time_units.num2date(c1.coord('time').points[-1]).year + c1end_month = time_units.num2date(c1.coord('time').points[-1]).month + c1end_day = time_units.num2date(c1.coord('time').points[-1]).day + c2start_year = time_units.num2date(c2.coord('time').points[0]).year + c2start_month = time_units.num2date(c2.coord('time').points[0]).month + c2start_day = time_units.num2date(c2.coord('time').points[0]).day + c2end_year = time_units.num2date(c2.coord('time').points[-1]).year + c2end_month = time_units.num2date(c2.coord('time').points[-1]).month + c2end_day = time_units.num2date(c2.coord('time').points[-1]).day + overlap_data = extract_time(c1, cstart_year, cstart_month, + cstart_day, cend_year, cend_month, + cend_day) + + # c1 is to the left of c2 + if c1.coord('time').points[0] < c2.coord('time').points[0]: + c1_delta = extract_time(c1, c1start_year, c1start_month, + c1start_day, cstart_year, cstart_month, + cstart_day) + c2_delta = extract_time(c2, cend_year, cend_month, + cend_day, c2end_year, c2end_month, + c2end_day) + cubes = iris.cube.CubeList([c1_delta, overlap_data, c2_delta]) + try: + cube = iris.cube.CubeList(cubes).concatenate_cube() + return cube + except iris.exceptions.ConcatenateError as ex: + logger.error('Can not concatenate cubes: %s', ex) + logger.error('Cubes:') + for cube in cubes: + logger.error(cube) + raise ex + elif c1.coord('time').points[0] == c2.coord('time').points[0]: + c2_delta = extract_time(c2, cend_year, cend_month, + cend_day, c2end_year, c2end_month, + c2end_day) + cubes = iris.cube.CubeList([overlap_data, c2_delta]) + try: + cube = iris.cube.CubeList(cubes).concatenate_cube() + return cube + except iris.exceptions.ConcatenateError as ex: + logger.error('Can not concatenate cubes: %s', ex) + logger.error('Cubes:') + for cube in cubes: + logger.error(cube) + raise ex + else: + return cubes From 1b4ac2c564a64b7101cc918219f78b61384b0616 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 26 Sep 2019 14:43:59 +0100 Subject: [PATCH 02/24] cleaned up --- esmvalcore/preprocessor/_io.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index f1bb59e471..d4794c40e3 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -12,6 +12,7 @@ import numpy as np import yaml +from esmvalcore.preprocessor._time import extract_time from .._task import write_ncl_settings logger = logging.getLogger(__name__) @@ -90,16 +91,15 @@ def concatenate(cubes): concatenated = iris.cube.CubeList(cubes).concatenate() if len(concatenated) == 1: return concatenated[0] - else: - if len(cubes) == 2: - if cubes[0].coord('time').points[0] <= \ + if len(cubes) == 2: + if cubes[0].coord('time').points[0] <= \ cubes[0].coord('time').points[0]: - cubes = [cubes[0], cubes[1]] - else: - cubes = [cubes[1], cubes[0]] - concatenated = _concatenate_two_overlapping_cubes(cubes) - if len(concatenated) == 1: - return concatenated + cubes = [cubes[0], cubes[1]] + else: + cubes = [cubes[1], cubes[0]] + concatenated = _concatenate_two_overlapping_cubes(cubes) + if len(concatenated) == 1: + return concatenated logger.error('Can not concatenate cubes into a single one.') logger.error('Resulting cubes:') for cube in concatenated: @@ -290,7 +290,7 @@ def _concatenate_two_overlapping_cubes(cubes): """Concatenate time-overlapping cubes (two cubes only).""" c1 = cubes[0] c2 = cubes[1] - common_time_points = [t for t in c1.coord('time').points \ + common_time_points = [t for t in c1.coord('time').points if t in c2.coord('time').points] if common_time_points: time_units = c1.coord('time').units @@ -303,18 +303,12 @@ def _concatenate_two_overlapping_cubes(cubes): c1start_year = time_units.num2date(common_time_points[0]).year c1start_month = time_units.num2date(c1.coord('time').points[0]).month c1start_day = time_units.num2date(c1.coord('time').points[0]).day - c1end_year = time_units.num2date(c1.coord('time').points[-1]).year - c1end_month = time_units.num2date(c1.coord('time').points[-1]).month - c1end_day = time_units.num2date(c1.coord('time').points[-1]).day - c2start_year = time_units.num2date(c2.coord('time').points[0]).year - c2start_month = time_units.num2date(c2.coord('time').points[0]).month - c2start_day = time_units.num2date(c2.coord('time').points[0]).day c2end_year = time_units.num2date(c2.coord('time').points[-1]).year c2end_month = time_units.num2date(c2.coord('time').points[-1]).month c2end_day = time_units.num2date(c2.coord('time').points[-1]).day overlap_data = extract_time(c1, cstart_year, cstart_month, - cstart_day, cend_year, cend_month, - cend_day) + cstart_day, cend_year, cend_month, + cend_day) # c1 is to the left of c2 if c1.coord('time').points[0] < c2.coord('time').points[0]: @@ -327,7 +321,7 @@ def _concatenate_two_overlapping_cubes(cubes): cubes = iris.cube.CubeList([c1_delta, overlap_data, c2_delta]) try: cube = iris.cube.CubeList(cubes).concatenate_cube() - return cube + return [cube] except iris.exceptions.ConcatenateError as ex: logger.error('Can not concatenate cubes: %s', ex) logger.error('Cubes:') @@ -341,7 +335,7 @@ def _concatenate_two_overlapping_cubes(cubes): cubes = iris.cube.CubeList([overlap_data, c2_delta]) try: cube = iris.cube.CubeList(cubes).concatenate_cube() - return cube + return [cube] except iris.exceptions.ConcatenateError as ex: logger.error('Can not concatenate cubes: %s', ex) logger.error('Cubes:') From 745d7c34803c8bd404f7852f34316564ebd089f8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 26 Sep 2019 15:40:58 +0100 Subject: [PATCH 03/24] cleaned up more --- esmvalcore/preprocessor/_io.py | 44 ++++++++++++++-------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index d4794c40e3..fcd99d1420 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -92,14 +92,15 @@ def concatenate(cubes): if len(concatenated) == 1: return concatenated[0] if len(cubes) == 2: - if cubes[0].coord('time').points[0] <= \ - cubes[0].coord('time').points[0]: - cubes = [cubes[0], cubes[1]] - else: - cubes = [cubes[1], cubes[0]] - concatenated = _concatenate_two_overlapping_cubes(cubes) - if len(concatenated) == 1: - return concatenated + if 'time' in [crd.standard_name for crd in cubes[0].coords()]: + if cubes[0].coord('time').points[0] <= \ + cubes[0].coord('time').points[0]: + cubes = [cubes[0], cubes[1]] + else: + cubes = [cubes[1], cubes[0]] + concatenated = _concatenate_two_overlapping_cubes(cubes) + if len(concatenated) == 1: + return concatenated logger.error('Can not concatenate cubes into a single one.') logger.error('Resulting cubes:') for cube in concatenated: @@ -319,28 +320,19 @@ def _concatenate_two_overlapping_cubes(cubes): cend_day, c2end_year, c2end_month, c2end_day) cubes = iris.cube.CubeList([c1_delta, overlap_data, c2_delta]) - try: - cube = iris.cube.CubeList(cubes).concatenate_cube() - return [cube] - except iris.exceptions.ConcatenateError as ex: - logger.error('Can not concatenate cubes: %s', ex) - logger.error('Cubes:') - for cube in cubes: - logger.error(cube) - raise ex elif c1.coord('time').points[0] == c2.coord('time').points[0]: c2_delta = extract_time(c2, cend_year, cend_month, cend_day, c2end_year, c2end_month, c2end_day) cubes = iris.cube.CubeList([overlap_data, c2_delta]) - try: - cube = iris.cube.CubeList(cubes).concatenate_cube() - return [cube] - except iris.exceptions.ConcatenateError as ex: - logger.error('Can not concatenate cubes: %s', ex) - logger.error('Cubes:') - for cube in cubes: - logger.error(cube) - raise ex + try: + cube = iris.cube.CubeList(cubes).concatenate_cube() + return [cube] + except iris.exceptions.ConcatenateError as ex: + logger.error('Can not concatenate cubes: %s', ex) + logger.error('Cubes:') + for cube in cubes: + logger.error(cube) + raise ex else: return cubes From e594c0726c4b48f2483f9bd52c18b2db6022cc38 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 26 Sep 2019 17:56:59 +0200 Subject: [PATCH 04/24] Add test and refactor code --- esmvalcore/preprocessor/_io.py | 71 ++++++++----------- .../preprocessor/_io/test_concatenate.py | 39 ++++++---- 2 files changed, 57 insertions(+), 53 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index fcd99d1420..e99b42ca3d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -91,16 +91,23 @@ def concatenate(cubes): concatenated = iris.cube.CubeList(cubes).concatenate() if len(concatenated) == 1: return concatenated[0] - if len(cubes) == 2: - if 'time' in [crd.standard_name for crd in cubes[0].coords()]: - if cubes[0].coord('time').points[0] <= \ - cubes[0].coord('time').points[0]: - cubes = [cubes[0], cubes[1]] + if len(concatenated) == 2: + print('Manage overlap') + try: + time_1 = concatenated[0].coord('time') + time_2 = concatenated[1].coord('time') + except iris.exceptions.CoordinateNotFoundError: + pass + else: + print('Order cubes') + if time_1.points[0] <= time_2.points[0]: + cubes = [concatenated[0], concatenated[1]] else: - cubes = [cubes[1], cubes[0]] + cubes = [concatenated[1], concatenated[0]] + print('Manage overlapping') concatenated = _concatenate_two_overlapping_cubes(cubes) if len(concatenated) == 1: - return concatenated + return concatenated[0] logger.error('Can not concatenate cubes into a single one.') logger.error('Resulting cubes:') for cube in concatenated: @@ -291,40 +298,24 @@ def _concatenate_two_overlapping_cubes(cubes): """Concatenate time-overlapping cubes (two cubes only).""" c1 = cubes[0] c2 = cubes[1] - common_time_points = [t for t in c1.coord('time').points - if t in c2.coord('time').points] + time_1 = c1.coord('time') + time_2 = c2.coord('time') + time_units = c1.coord('time').units + common_time_points = [ + time_units.num2date(t) for t in time_1.points + if t in time_2.points + ] + print(common_time_points) if common_time_points: - time_units = c1.coord('time').units - cstart_year = time_units.num2date(common_time_points[0]).year - cstart_month = time_units.num2date(common_time_points[0]).month - cstart_day = time_units.num2date(common_time_points[0]).day - cend_year = time_units.num2date(common_time_points[-1]).year - cend_month = time_units.num2date(common_time_points[-1]).month - cend_day = time_units.num2date(common_time_points[-1]).day - c1start_year = time_units.num2date(common_time_points[0]).year - c1start_month = time_units.num2date(c1.coord('time').points[0]).month - c1start_day = time_units.num2date(c1.coord('time').points[0]).day - c2end_year = time_units.num2date(c2.coord('time').points[-1]).year - c2end_month = time_units.num2date(c2.coord('time').points[-1]).month - c2end_day = time_units.num2date(c2.coord('time').points[-1]).day - overlap_data = extract_time(c1, cstart_year, cstart_month, - cstart_day, cend_year, cend_month, - cend_day) - - # c1 is to the left of c2 - if c1.coord('time').points[0] < c2.coord('time').points[0]: - c1_delta = extract_time(c1, c1start_year, c1start_month, - c1start_day, cstart_year, cstart_month, - cstart_day) - c2_delta = extract_time(c2, cend_year, cend_month, - cend_day, c2end_year, c2end_month, - c2end_day) - cubes = iris.cube.CubeList([c1_delta, overlap_data, c2_delta]) - elif c1.coord('time').points[0] == c2.coord('time').points[0]: - c2_delta = extract_time(c2, cend_year, cend_month, - cend_day, c2end_year, c2end_month, - c2end_day) - cubes = iris.cube.CubeList([overlap_data, c2_delta]) + data_start = time_1.cell(0).point + start_overlap = common_time_points[0] + + c1_delta = extract_time( + c1, + data_start.year, data_start.month, data_start.day, + start_overlap.year, start_overlap.month, start_overlap.day + ) + cubes = iris.cube.CubeList([c1_delta, c2]) try: cube = iris.cube.CubeList(cubes).concatenate_cube() return [cube] diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 276ada273c..cb8fbc78af 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -14,28 +14,41 @@ class TestConcatenate(unittest.TestCase): def setUp(self): """Start tests.""" - coord = DimCoord([1, 2], var_name='coord') - second_coord = coord.copy([3, 4]) - third_coord = coord.copy([5, 6]) + self._model_coord = DimCoord( + [1., 2.], var_name='time', standard_name='time', + units='days since 1950-01-01' + ) self.raw_cubes = [] + self._add_cube([1., 2.], [1., 2.]) + self._add_cube([3., 4.], [3., 4.]) + self._add_cube([5., 6.], [5., 6.]) + + def _add_cube(self, data, coord): self.raw_cubes.append( - Cube([1, 2], var_name='sample', dim_coords_and_dims=((coord, - 0), ))) - self.raw_cubes.append( - Cube([3, 4], - var_name='sample', - dim_coords_and_dims=((second_coord, 0), ))) - self.raw_cubes.append( - Cube([5, 6], + Cube(data, var_name='sample', - dim_coords_and_dims=((third_coord, 0), ))) + dim_coords_and_dims=((self._model_coord.copy(coord), 0), ) + ) + ) def test_concatenate(self): """Test concatenation of two cubes.""" concatenated = _io.concatenate(self.raw_cubes) - self.assertTrue((concatenated.coord('coord').points == np.array( + self.assertTrue((concatenated.coord('time').points == np.array( [1, 2, 3, 4, 5, 6])).all()) + def test_concatenate_with_overlap(self): + self._add_cube([6.5, 7.5], [6., 7.]) + concatenated = _io.concatenate(self.raw_cubes) + self.assertTrue(np.allclose( + concatenated.coord('time').points, + np.array([1., 2., 3., 4., 5., 6., 7.]) + )) + self.assertTrue(np.allclose( + concatenated.data, + np.array([1., 2., 3., 4., 5., 6.5, 7.5]) + )) + def test_fail_with_duplicates(self): """Test exception raised if two cubes are overlapping.""" self.raw_cubes.append(self.raw_cubes[0].copy()) From 18d6a251ab2b9ee4309c78d94c4d12294f2081e3 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 26 Sep 2019 18:00:55 +0200 Subject: [PATCH 05/24] Remove debug prints --- esmvalcore/preprocessor/_io.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index e99b42ca3d..6bdaf2589c 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -92,19 +92,16 @@ def concatenate(cubes): if len(concatenated) == 1: return concatenated[0] if len(concatenated) == 2: - print('Manage overlap') try: time_1 = concatenated[0].coord('time') time_2 = concatenated[1].coord('time') except iris.exceptions.CoordinateNotFoundError: pass else: - print('Order cubes') if time_1.points[0] <= time_2.points[0]: cubes = [concatenated[0], concatenated[1]] else: cubes = [concatenated[1], concatenated[0]] - print('Manage overlapping') concatenated = _concatenate_two_overlapping_cubes(cubes) if len(concatenated) == 1: return concatenated[0] From f0bfcdd2a03b8a94b47eda41190a8ca0ba3f07a5 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 26 Sep 2019 18:11:57 +0200 Subject: [PATCH 06/24] Small refactor --- esmvalcore/preprocessor/_io.py | 25 ++++++++----------- .../preprocessor/_io/test_concatenate.py | 1 + 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 6bdaf2589c..296181e049 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -293,26 +293,21 @@ def _write_ncl_metadata(output_dir, metadata): def _concatenate_two_overlapping_cubes(cubes): """Concatenate time-overlapping cubes (two cubes only).""" - c1 = cubes[0] - c2 = cubes[1] - time_1 = c1.coord('time') - time_2 = c2.coord('time') - time_units = c1.coord('time').units - common_time_points = [ - time_units.num2date(t) for t in time_1.points - if t in time_2.points - ] - print(common_time_points) - if common_time_points: + time_1 = cubes[0].coord('time') + time_2 = cubes[1].coord('time') + start_overlap = next( + (time_1.units.num2date(t) for t in time_1.points + if t in time_2.points), + None + ) + if start_overlap: data_start = time_1.cell(0).point - start_overlap = common_time_points[0] - c1_delta = extract_time( - c1, + cubes[0], data_start.year, data_start.month, data_start.day, start_overlap.year, start_overlap.month, start_overlap.day ) - cubes = iris.cube.CubeList([c1_delta, c2]) + cubes = iris.cube.CubeList([c1_delta, cubes[1]]) try: cube = iris.cube.CubeList(cubes).concatenate_cube() return [cube] diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index cb8fbc78af..bfff5acf87 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -38,6 +38,7 @@ def test_concatenate(self): [1, 2, 3, 4, 5, 6])).all()) def test_concatenate_with_overlap(self): + """Test concatenation of time overalapping cubes""" self._add_cube([6.5, 7.5], [6., 7.]) concatenated = _io.concatenate(self.raw_cubes) self.assertTrue(np.allclose( From e25a37278a4ed29edd2757e4bb52635cdaec566a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 27 Sep 2019 13:24:23 +0100 Subject: [PATCH 07/24] removed test that fails due to overlapping cubes --- tests/integration/preprocessor/_io/test_concatenate.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index bfff5acf87..c2a2bb2ea2 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -50,12 +50,6 @@ def test_concatenate_with_overlap(self): np.array([1., 2., 3., 4., 5., 6.5, 7.5]) )) - def test_fail_with_duplicates(self): - """Test exception raised if two cubes are overlapping.""" - self.raw_cubes.append(self.raw_cubes[0].copy()) - with self.assertRaises(ValueError): - _io.concatenate(self.raw_cubes) - def test_fail_metadata_differs(self): """Test exception raised if two cubes have different metadata.""" self.raw_cubes[0].units = 'm' From 3ac8cd184911b3b934d8b338616bf5ab2b268336 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 27 Sep 2019 13:24:44 +0100 Subject: [PATCH 08/24] fixed proper the exceptional concatenation --- esmvalcore/preprocessor/_io.py | 70 ++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 296181e049..3435f1217f 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -91,7 +91,7 @@ def concatenate(cubes): concatenated = iris.cube.CubeList(cubes).concatenate() if len(concatenated) == 1: return concatenated[0] - if len(concatenated) == 2: + if len(concatenated) == 2 and concatenated[0] != concatenated[1]: try: time_1 = concatenated[0].coord('time') time_2 = concatenated[1].coord('time') @@ -293,29 +293,51 @@ def _write_ncl_metadata(output_dir, metadata): def _concatenate_two_overlapping_cubes(cubes): """Concatenate time-overlapping cubes (two cubes only).""" + # get time end points time_1 = cubes[0].coord('time') time_2 = cubes[1].coord('time') - start_overlap = next( - (time_1.units.num2date(t) for t in time_1.points - if t in time_2.points), - None - ) - if start_overlap: - data_start = time_1.cell(0).point - c1_delta = extract_time( - cubes[0], - data_start.year, data_start.month, data_start.day, - start_overlap.year, start_overlap.month, start_overlap.day - ) - cubes = iris.cube.CubeList([c1_delta, cubes[1]]) - try: - cube = iris.cube.CubeList(cubes).concatenate_cube() - return [cube] - except iris.exceptions.ConcatenateError as ex: - logger.error('Can not concatenate cubes: %s', ex) - logger.error('Cubes:') - for cube in cubes: - logger.error(cube) - raise ex + data_start_1 = time_1.cell(0).point + data_start_2 = time_2.cell(0).point + data_end_1 = time_1.cell(-1).point + data_end_2 = time_2.cell(-1).point + + # remember we have arranged [cube1, cube2] + # so that cube1.start <= cube2.start + + # case 1: both cubes start at the same time + if data_start_1 == data_start_2: + if data_end_1 < data_end_2: + cubes = [cubes[1]] + else: + cubes = [cubes[0]] + + # case 2: cube1 starts before cube2 else: - return cubes + # find time overlap, if any + start_overlap = next( + (time_1.units.num2date(t) for t in time_1.points + if t in time_2.points), + None + ) + # case 2.1: cube1 ends before cube2 + if start_overlap and data_end_1 <= data_end_2: + c1_delta = extract_time( + cubes[0], + data_start_1.year, data_start_1.month, data_start_1.day, + start_overlap.year, start_overlap.month, start_overlap.day + ) + cubes = iris.cube.CubeList([c1_delta, cubes[1]]) + try: + cubes = [iris.cube.CubeList(cubes).concatenate_cube()] + except iris.exceptions.ConcatenateError as ex: + logger.error('Can not concatenate cubes: %s', ex) + logger.error('Cubes:') + for cube in cubes: + logger.error(cube) + raise ex + # case 2.2: cube1 ends after cube2 + if start_overlap and data_end_1 > data_end_2: + cubes = [cubes[0]] + # case 2.3: there is no overlap: return original cubes + + return cubes From 706146ac5300bab4fbae80d3e74edb3fd6ec7a5a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 4 Oct 2019 15:18:26 +0100 Subject: [PATCH 09/24] adjusted according to comments --- esmvalcore/preprocessor/_io.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 3435f1217f..76795c1de9 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -91,18 +91,14 @@ def concatenate(cubes): concatenated = iris.cube.CubeList(cubes).concatenate() if len(concatenated) == 1: return concatenated[0] - if len(concatenated) == 2 and concatenated[0] != concatenated[1]: + if len(concatenated) == 2: try: - time_1 = concatenated[0].coord('time') - time_2 = concatenated[1].coord('time') + concatenated[0].coord('time') + concatenated[1].coord('time') except iris.exceptions.CoordinateNotFoundError: pass else: - if time_1.points[0] <= time_2.points[0]: - cubes = [concatenated[0], concatenated[1]] - else: - cubes = [concatenated[1], concatenated[0]] - concatenated = _concatenate_two_overlapping_cubes(cubes) + concatenated = _concatenate_two_overlapping_cubes(concatenated) if len(concatenated) == 1: return concatenated[0] logger.error('Can not concatenate cubes into a single one.') @@ -293,6 +289,12 @@ def _write_ncl_metadata(output_dir, metadata): def _concatenate_two_overlapping_cubes(cubes): """Concatenate time-overlapping cubes (two cubes only).""" + # we arrange [cube1, cube2] so that cube1.start <= cube2.start + if cubes[0].coord('time').points[0] <= cubes[1].coord('time').points[0]: + cubes = [cubes[0], cubes[1]] + else: + cubes = [cubes[1], cubes[0]] + # get time end points time_1 = cubes[0].coord('time') time_2 = cubes[1].coord('time') @@ -301,10 +303,7 @@ def _concatenate_two_overlapping_cubes(cubes): data_end_1 = time_1.cell(-1).point data_end_2 = time_2.cell(-1).point - # remember we have arranged [cube1, cube2] - # so that cube1.start <= cube2.start - - # case 1: both cubes start at the same time + # case 1: both cubes start at the same time -> return longer cube if data_start_1 == data_start_2: if data_end_1 < data_end_2: cubes = [cubes[1]] @@ -319,7 +318,7 @@ def _concatenate_two_overlapping_cubes(cubes): if t in time_2.points), None ) - # case 2.1: cube1 ends before cube2 + # case 2.1: cube1 ends before cube2 -> use full cube2 and shorten cube1 if start_overlap and data_end_1 <= data_end_2: c1_delta = extract_time( cubes[0], @@ -335,7 +334,7 @@ def _concatenate_two_overlapping_cubes(cubes): for cube in cubes: logger.error(cube) raise ex - # case 2.2: cube1 ends after cube2 + # case 2.2: cube1 ends after cube2 -> return cube1 if start_overlap and data_end_1 > data_end_2: cubes = [cubes[0]] # case 2.3: there is no overlap: return original cubes From 8f2a1cb67b10539ff2a1f18f002356009070d7d9 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 15 Nov 2019 15:12:36 +0000 Subject: [PATCH 10/24] added fail case straight in func --- esmvalcore/preprocessor/_io.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index a31d9eed78..ff582f9cce 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -313,6 +313,11 @@ def _concatenate_two_overlapping_cubes(cubes): if t in time_2.points), None ) + # case 0: no overlap (new iris implementaion does allow + # concatenation of cubes with no overlap) + if not start_overlap: + raise ValueError(f"Attempting to concatenate cubes that are" + f"separated in time.") # case 2.1: cube1 ends before cube2 -> use full cube2 and shorten cube1 if start_overlap and data_end_1 <= data_end_2: c1_delta = extract_time( From 53b7efafaf014a2c5635cf8a4573a1494fcce2f1 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 15 Nov 2019 15:12:59 +0000 Subject: [PATCH 11/24] added moar tests --- .../preprocessor/_io/test_concatenate.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index c2a2bb2ea2..c26ee98afa 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -2,6 +2,7 @@ import unittest +from cf_units import Unit import numpy as np from iris.coords import DimCoord from iris.cube import Cube @@ -50,6 +51,67 @@ def test_concatenate_with_overlap(self): np.array([1., 2., 3., 4., 5., 6.5, 7.5]) )) + def test_concatenate_with_overlap_2(self): + """Test a more generic case.""" + self._add_cube([65., 75.], [3., 200.]) + self._add_cube([65., 75.], [1000., 7000.]) + concatenated = _io.concatenate(self.raw_cubes) + self.assertTrue(np.allclose( + concatenated.coord('time').points, + np.array([1., 2., 3., 4., 5., 6., 1000., 7000.]) + )) + + def test_fail_on_calendar_concatenate_with_overlap(self): + """Test fail of concatenation with overlap.""" + time_coord = DimCoord( + [3., 7000.], var_name='time', standard_name='time', + units=Unit('days since 1950-01-01', calendar='360_day') + ) + self.raw_cubes.append( + Cube([33., 55.], + var_name='sample', + dim_coords_and_dims=((time_coord, 0), ) + ) + ) + with self.assertRaises(TypeError): + _io.concatenate(self.raw_cubes) + + def test_fail_on_units_concatenate_with_overlap(self): + """Test fail of concatenation with overlap.""" + time_coord_1 = DimCoord( + [3., 7000.], var_name='time', standard_name='time', + units=Unit('days since 1950-01-01', calendar='360_day') + ) + time_coord_2 = DimCoord( + [3., 9000.], var_name='time', standard_name='time', + units=Unit('days since 1950-01-01', calendar='360_day') + ) + time_coord_3 = DimCoord( + [3., 9000.], var_name='time', standard_name='time', + units=Unit('days since 1850-01-01', calendar='360_day') + ) + raw_cubes = [] + raw_cubes.append( + Cube([33., 55.], + var_name='sample', + dim_coords_and_dims=((time_coord_1, 0), ) + ) + ) + raw_cubes.append( + Cube([33., 55.], + var_name='sample', + dim_coords_and_dims=((time_coord_2, 0), ) + ) + ) + raw_cubes.append( + Cube([33., 55.], + var_name='sample', + dim_coords_and_dims=((time_coord_3, 0), ) + ) + ) + with self.assertRaises(ValueError): + _io.concatenate(raw_cubes) + def test_fail_metadata_differs(self): """Test exception raised if two cubes have different metadata.""" self.raw_cubes[0].units = 'm' From 4497a4f1ff3ae336ebc0765a5ab0925c57f108d6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 15 Nov 2019 15:25:58 +0000 Subject: [PATCH 12/24] pinning python --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index aa4772edec..3fd49094eb 100644 --- a/environment.yml +++ b/environment.yml @@ -9,5 +9,5 @@ dependencies: - graphviz - gcc_linux-64 - libunwind # Needed for Python3.7+ - - python>=3.6 + - python>=3.6,<3.8 - python-stratify From 32ca777988667cdd3466cd191bf30b4b954ed42a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 15 Nov 2019 15:26:15 +0000 Subject: [PATCH 13/24] pinning python in meta --- meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meta.yaml b/meta.yaml index 54a711edfe..e69fa5efc9 100644 --- a/meta.yaml +++ b/meta.yaml @@ -29,13 +29,13 @@ build: requirements: build: - git - - python>=3.6 + - python>=3.6,<3.8 # Normally installed via pip: - pytest-runner - setuptools_scm run: # esmvaltool - - python>=3.6 + - python>=3.6,<3.8 - libunwind # specifically for Python3.7+ - graphviz - iris>=2.2.1 From bda670cef750fcdb63dfe78a954054dcf9e976dc Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 15 Nov 2019 16:02:31 +0000 Subject: [PATCH 14/24] one moar test --- .../preprocessor/_io/test_concatenate.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index c26ee98afa..82c59554b1 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -61,6 +61,26 @@ def test_concatenate_with_overlap_2(self): np.array([1., 2., 3., 4., 5., 6., 1000., 7000.]) )) + def test_concatenate_with_overlap_same_start(self): + """Test a more generic case.""" + cube1 = self.raw_cubes[0] + raw_cubes = [cube1, ] + time_coord = DimCoord( + [1., 7.], var_name='time', standard_name='time', + units='days since 1950-01-01' + ) + raw_cubes.append( + Cube([33., 55.], + var_name='sample', + dim_coords_and_dims=((time_coord, 0), ) + ) + ) + concatenated = _io.concatenate(raw_cubes) + self.assertTrue(np.allclose( + concatenated.coord('time').points, + np.array([1., 7.]) + )) + def test_fail_on_calendar_concatenate_with_overlap(self): """Test fail of concatenation with overlap.""" time_coord = DimCoord( From 7992312515a216c62a207033f8ea9948a708f66d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 19 Nov 2019 17:23:05 +0000 Subject: [PATCH 15/24] added doc on concatenation --- doc/esmvalcore/datafinder.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/doc/esmvalcore/datafinder.rst b/doc/esmvalcore/datafinder.rst index 206b3d1766..1b016194f9 100644 --- a/doc/esmvalcore/datafinder.rst +++ b/doc/esmvalcore/datafinder.rst @@ -257,3 +257,22 @@ Since observational data are organized in Tiers depending on their level of public availability, the ``default`` directory must be structured accordingly with sub-directories ``TierX`` (``Tier1``, ``Tier2`` or ``Tier3``), even when ``drs: default``. + +Data concatenation from multiple sources +======================================== + +Oftentimes data retrieving results in assembling a continuous data stream from +multiple files or even, multiple experiments. The internal mechanism through which +the assembly is done is via cube concatenation. One peculiarity of iris concatenation +(see `iris cube concatenation `_) +is that it doesn't allow for concatenating time-overlapping cubes; this case is rather +frequent with data from models overlapping in time, and is accounted for by a function that performs a +flexible concatenation between two cubes, depending on the particular setup: + +* cubes overlap time: resulting cube is made up of the overlapping data plus left and +right hand sides on each side of the overlapping data; +* cubes don't overlap in time: data from the two cubes is bolted together; + +Note that two cube concatenation is the base operation of an iterative process of reducing multiple cubes +from multiple data segments via cube concatenation ie if there is no time-overlapping data, the +cubes concatenation is performed in one step. From fc788ec4dfecd2147adb32c41343d23c27a61413 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 20 Nov 2019 16:00:05 +0000 Subject: [PATCH 16/24] fixed indentation --- doc/esmvalcore/datafinder.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/esmvalcore/datafinder.rst b/doc/esmvalcore/datafinder.rst index 1b016194f9..ae0eab19ce 100644 --- a/doc/esmvalcore/datafinder.rst +++ b/doc/esmvalcore/datafinder.rst @@ -270,7 +270,7 @@ frequent with data from models overlapping in time, and is accounted for by a fu flexible concatenation between two cubes, depending on the particular setup: * cubes overlap time: resulting cube is made up of the overlapping data plus left and -right hand sides on each side of the overlapping data; + right hand sides on each side of the overlapping data; * cubes don't overlap in time: data from the two cubes is bolted together; Note that two cube concatenation is the base operation of an iterative process of reducing multiple cubes From 8e5709dad230747342e05beeb17233a37a570ebf Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 28 Nov 2019 14:40:51 +0000 Subject: [PATCH 17/24] full coverage for concatenate smart --- .../preprocessor/_io/test_concatenate.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index 82c59554b1..acc0faee8a 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -6,6 +6,7 @@ import numpy as np from iris.coords import DimCoord from iris.cube import Cube +import iris.exceptions as Exc from esmvalcore.preprocessor import _io @@ -80,6 +81,57 @@ def test_concatenate_with_overlap_same_start(self): concatenated.coord('time').points, np.array([1., 7.]) )) + raw_cubes.reverse() + concatenated = _io.concatenate(raw_cubes) + self.assertTrue(np.allclose( + concatenated.coord('time').points, + np.array([1., 7.]) + )) + + def test_concatenate_with_iris_exception(self): + """Test a more generic case.""" + time_coord_1 = DimCoord( + [1.5, 5., 7.], var_name='time', standard_name='time', + units='days since 1950-01-01') + cube1 = Cube([33., 55., 77.], + var_name='sample', + dim_coords_and_dims=((time_coord_1, 0), )) + time_coord_2 = DimCoord( + [1., 5., 7.], var_name='time', standard_name='time', + units='days since 1950-01-01') + cube2 = Cube([33., 55., 77.], + var_name='sample', + dim_coords_and_dims=((time_coord_2, 0), )) + cubes_single_ovlp = [cube2, cube1] + with self.assertRaises(Exc.ConcatenateError): + _io.concatenate(cubes_single_ovlp) + + def test_concatenate_with_order(self): + """Test a more generic case.""" + time_coord_1 = DimCoord( + [1.5, 2., 5., 7.], var_name='time', standard_name='time', + units='days since 1950-01-01') + cube1 = Cube([33., 44., 55., 77.], + var_name='sample', + dim_coords_and_dims=((time_coord_1, 0), )) + time_coord_2 = DimCoord( + [1., 2., 5., 7., 100.], var_name='time', standard_name='time', + units='days since 1950-01-01') + cube2 = Cube([33., 44., 55., 77., 1000.], + var_name='sample', + dim_coords_and_dims=((time_coord_2, 0), )) + cubes_ordered = [cube2, cube1] + concatenated = _io.concatenate(cubes_ordered) + self.assertTrue(np.allclose( + concatenated.coord('time').points, + np.array([1., 2., 5., 7., 100.]) + )) + cubes_reverse = [cube1, cube2] + concatenated = _io.concatenate(cubes_reverse) + self.assertTrue(np.allclose( + concatenated.coord('time').points, + np.array([1., 2., 5., 7., 100.]) + )) def test_fail_on_calendar_concatenate_with_overlap(self): """Test fail of concatenation with overlap.""" From 8801029af96a187a5fc91cdf327af6d819f2dad2 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 28 Nov 2019 15:02:58 +0000 Subject: [PATCH 18/24] explained the cube overlap case for multiple experiments --- doc/esmvalcore/datafinder.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/esmvalcore/datafinder.rst b/doc/esmvalcore/datafinder.rst index ae0eab19ce..ea5e6ae1ff 100644 --- a/doc/esmvalcore/datafinder.rst +++ b/doc/esmvalcore/datafinder.rst @@ -269,8 +269,12 @@ is that it doesn't allow for concatenating time-overlapping cubes; this case is frequent with data from models overlapping in time, and is accounted for by a function that performs a flexible concatenation between two cubes, depending on the particular setup: -* cubes overlap time: resulting cube is made up of the overlapping data plus left and - right hand sides on each side of the overlapping data; +* cubes overlap in time: resulting cube is made up of the overlapping data plus left and + right hand sides on each side of the overlapping data; note that in the case of the cubes + coming from different experiments the resulting concatenated cube will have composite data + made up from multiple experiments: assume [cube1: exp1, cube2: exp2] and cube1 starts before cube2, + and cube2 finishes after cube1, then the concatenated cube will be made up of cube2: exp2 plus the + section of cube1: exp1 that contains data not provided in cube2: exp2; * cubes don't overlap in time: data from the two cubes is bolted together; Note that two cube concatenation is the base operation of an iterative process of reducing multiple cubes From 0651780017819a484ee4db66543bfee0a61a1154 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Dec 2019 10:38:46 +0100 Subject: [PATCH 19/24] Fix minor style issues --- esmvalcore/preprocessor/_io.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index ff582f9cce..9d6e5809ab 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -88,9 +88,8 @@ def _fix_cube_attributes(cubes): def concatenate(cubes): """Concatenate all cubes after fixing metadata.""" _fix_cube_attributes(cubes) + concatenated = iris.cube.CubeList(cubes).concatenate() - if len(concatenated) == 1: - return concatenated[0] if len(concatenated) == 2: try: concatenated[0].coord('time') @@ -98,9 +97,11 @@ def concatenate(cubes): except iris.exceptions.CoordinateNotFoundError: pass else: - concatenated = _concatenate_two_overlapping_cubes(concatenated) - if len(concatenated) == 1: - return concatenated[0] + concatenated = _concatenate_overlapping_cubes(concatenated) + + if len(concatenated) == 1: + return concatenated[0] + logger.error('Can not concatenate cubes into a single one.') logger.error('Resulting cubes:') for cube in concatenated: @@ -282,7 +283,7 @@ def _write_ncl_metadata(output_dir, metadata): return filename -def _concatenate_two_overlapping_cubes(cubes): +def _concatenate_overlapping_cubes(cubes): """Concatenate time-overlapping cubes (two cubes only).""" # we arrange [cube1, cube2] so that cube1.start <= cube2.start if cubes[0].coord('time').points[0] <= cubes[1].coord('time').points[0]: From 63bccfef0ac8e6fc96d5bd24fb392451e23a9629 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Dec 2019 10:43:19 +0100 Subject: [PATCH 20/24] Fix imports --- esmvalcore/preprocessor/_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 9d6e5809ab..44a0063344 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -12,8 +12,8 @@ import numpy as np import yaml -from esmvalcore.preprocessor._time import extract_time from .._task import write_ncl_settings +from ._time import extract_time logger = logging.getLogger(__name__) From 53f0949f75aebfd289afccd751cbbb4ffa2669bc Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 27 Jan 2020 13:30:54 +0000 Subject: [PATCH 21/24] added bunch of debug message to please the Bouwe gods heh --- esmvalcore/preprocessor/_io.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 44a0063344..b3f3ba9e17 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -288,8 +288,12 @@ def _concatenate_overlapping_cubes(cubes): # we arrange [cube1, cube2] so that cube1.start <= cube2.start if cubes[0].coord('time').points[0] <= cubes[1].coord('time').points[0]: cubes = [cubes[0], cubes[1]] + logger.debug(f"Will attempt to concatenate cubes %s " + f"and %s in this order", cubes[0], cubes[1]) else: cubes = [cubes[1], cubes[0]] + logger.debug(f"Will attempt to concatenate cubes %s " + f"and %s in this order", cubes[1], cubes[0]) # get time end points time_1 = cubes[0].coord('time') @@ -302,8 +306,16 @@ def _concatenate_overlapping_cubes(cubes): # case 1: both cubes start at the same time -> return longer cube if data_start_1 == data_start_2: if data_end_1 < data_end_2: + logger.debug(f"Both cubes start at the same time but cube %s " + f"ends before %s", cubes[0], cubes[1]) + logger.debug(f"Cube %s contains all needed data " + f"so using it fully", cubes[1]) cubes = [cubes[1]] else: + logger.debug(f"Both cubes start at the same time but cube %s " + f"ends before %s", cubes[1], cubes[0]) + logger.debug(f"Cube %s contains all needed data " + f"so using it fully", cubes[0]) cubes = [cubes[0]] # case 2: cube1 starts before cube2 @@ -321,12 +333,24 @@ def _concatenate_overlapping_cubes(cubes): f"separated in time.") # case 2.1: cube1 ends before cube2 -> use full cube2 and shorten cube1 if start_overlap and data_end_1 <= data_end_2: + logger.debug(f"Extracting time slice between %s and %s " + f"from cube %s to use it for concatenation " + f"with cube %s", + "-".join([str(data_start_1.year), + str(data_start_1.month), + str(data_start_1.day)]), + "-".join([str(start_overlap.year), + str(start_overlap.month), + str(start_overlap.day)]), + cubes[0], cubes[1]) c1_delta = extract_time( cubes[0], data_start_1.year, data_start_1.month, data_start_1.day, start_overlap.year, start_overlap.month, start_overlap.day ) cubes = iris.cube.CubeList([c1_delta, cubes[1]]) + logger.debug("Attemptong concatenatenation of %s with %s", + c1_delta, cubes[1]) try: cubes = [iris.cube.CubeList(cubes).concatenate_cube()] except iris.exceptions.ConcatenateError as ex: @@ -338,6 +362,7 @@ def _concatenate_overlapping_cubes(cubes): # case 2.2: cube1 ends after cube2 -> return cube1 if start_overlap and data_end_1 > data_end_2: cubes = [cubes[0]] + logger.debug("Using only data from %s", cubes[0]) # case 2.3: there is no overlap: return original cubes return cubes From 7e7b17b956791ea0ecedb5b391691686312ec3d5 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 3 Feb 2020 12:14:53 +0000 Subject: [PATCH 22/24] Bouwe suggestion Co-Authored-By: Bouwe Andela --- esmvalcore/preprocessor/_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index b3f3ba9e17..b49a3a4ca5 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -306,7 +306,7 @@ def _concatenate_overlapping_cubes(cubes): # case 1: both cubes start at the same time -> return longer cube if data_start_1 == data_start_2: if data_end_1 < data_end_2: - logger.debug(f"Both cubes start at the same time but cube %s " + logger.debug("Both cubes start at the same time but cube %s " f"ends before %s", cubes[0], cubes[1]) logger.debug(f"Cube %s contains all needed data " f"so using it fully", cubes[1]) From 0736cf6bdaa1889475e65091bbe56e7cdb15d152 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 3 Feb 2020 12:15:22 +0000 Subject: [PATCH 23/24] Bouwe suggestion Co-Authored-By: Bouwe Andela --- esmvalcore/preprocessor/_io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index b49a3a4ca5..7554433116 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -329,7 +329,8 @@ def _concatenate_overlapping_cubes(cubes): # case 0: no overlap (new iris implementaion does allow # concatenation of cubes with no overlap) if not start_overlap: - raise ValueError(f"Attempting to concatenate cubes that are" + logger.debug("Unable to concatenate non-overlapping cubes\n%s\nand\n%s" + return cubes f"separated in time.") # case 2.1: cube1 ends before cube2 -> use full cube2 and shorten cube1 if start_overlap and data_end_1 <= data_end_2: From f0475dd6fac6d3b24e2dfd02321946fabebafdfd Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Fri, 7 Feb 2020 12:16:19 +0100 Subject: [PATCH 24/24] Fix syntax error and clean up a bit --- doc/esmvalcore/datafinder.rst | 12 +- esmvalcore/preprocessor/_io.py | 91 +++++----- .../preprocessor/_io/test_concatenate.py | 163 ++++++++---------- 3 files changed, 127 insertions(+), 139 deletions(-) diff --git a/doc/esmvalcore/datafinder.rst b/doc/esmvalcore/datafinder.rst index ea5e6ae1ff..cf7caf0791 100644 --- a/doc/esmvalcore/datafinder.rst +++ b/doc/esmvalcore/datafinder.rst @@ -50,7 +50,7 @@ section. Data retrieval ============== Data retrieval in ESMValTool has two main aspects from the user's point of -view: +view: * data can be found by the tool, subject to availability on disk; * it is the user's responsibility to set the correct data retrieval parameters; @@ -73,7 +73,7 @@ set the paths are ``rootpath`` and ``drs``. ``rootpath`` contains pointers to of directory structure the root paths are structured by. It is important to first discuss the ``drs`` parameter: as we've seen in the previous section, the DRS as a standard is used for both file naming conventions and for directory -structures. +structures. .. _config-user-drs: @@ -81,7 +81,7 @@ Explaining ``config-user/drs: CMIP5:`` or ``config-user/drs: CMIP6:`` --------------------------------------------------------------------- Whreas ESMValTool will **always** use the CMOR standard for file naming (please refer above), by setting the ``drs`` parameter the user tells the tool what -type of root paths they need the data from, e.g.: +type of root paths they need the data from, e.g.: .. code-block:: yaml @@ -156,7 +156,7 @@ Explaining ``config-user/rootpath:`` * ``default``: this is the `root` path(s) to where files are stored without any DRS-like directory structure; in a nutshell, this is a single directory that should contain all the files needed by the run, without any sub-directory - structure. + structure. * ``RAWOBS``: this is the `root` path(s) to where the raw observational data files are stored; this is used by ``cmorize_obs``. @@ -168,7 +168,7 @@ information on the specific datasets that are needed for the analysis. This information, together with the CMOR convention for naming files (see CMOR-DRS_) will allow the tool to search and find the right files. The specific datasets are listed in any recipe, under either the ``datasets`` and/or -``additional_datasets`` sections, e.g. +``additional_datasets`` sections, e.g. .. code-block:: yaml @@ -217,7 +217,7 @@ CMOR-DRS_ and establish the path to the files as: /badc/cmip6/data/CMIP6/CMIP/MOHC/UKESM1-0-LL/historical/r1i1p1f2/Amon then look for variable ``ta`` and specifically the latest version of the data -file: +file: .. code-block:: diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 7554433116..468ef023b7 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -288,12 +288,14 @@ def _concatenate_overlapping_cubes(cubes): # we arrange [cube1, cube2] so that cube1.start <= cube2.start if cubes[0].coord('time').points[0] <= cubes[1].coord('time').points[0]: cubes = [cubes[0], cubes[1]] - logger.debug(f"Will attempt to concatenate cubes %s " - f"and %s in this order", cubes[0], cubes[1]) + logger.debug( + "Will attempt to concatenate cubes %s " + "and %s in this order", cubes[0], cubes[1]) else: cubes = [cubes[1], cubes[0]] - logger.debug(f"Will attempt to concatenate cubes %s " - f"and %s in this order", cubes[1], cubes[0]) + logger.debug( + "Will attempt to concatenate cubes %s " + "and %s in this order", cubes[1], cubes[0]) # get time end points time_1 = cubes[0].coord('time') @@ -305,52 +307,56 @@ def _concatenate_overlapping_cubes(cubes): # case 1: both cubes start at the same time -> return longer cube if data_start_1 == data_start_2: - if data_end_1 < data_end_2: - logger.debug("Both cubes start at the same time but cube %s " - f"ends before %s", cubes[0], cubes[1]) - logger.debug(f"Cube %s contains all needed data " - f"so using it fully", cubes[1]) + if data_end_1 <= data_end_2: + logger.debug( + "Both cubes start at the same time but cube %s " + "ends before %s", cubes[0], cubes[1]) + logger.debug("Cube %s contains all needed data so using it fully", + cubes[1]) cubes = [cubes[1]] else: - logger.debug(f"Both cubes start at the same time but cube %s " - f"ends before %s", cubes[1], cubes[0]) - logger.debug(f"Cube %s contains all needed data " - f"so using it fully", cubes[0]) + logger.debug( + "Both cubes start at the same time but cube %s " + "ends before %s", cubes[1], cubes[0]) + logger.debug("Cube %s contains all needed data so using it fully", + cubes[0]) cubes = [cubes[0]] # case 2: cube1 starts before cube2 else: # find time overlap, if any - start_overlap = next( - (time_1.units.num2date(t) for t in time_1.points - if t in time_2.points), - None - ) - # case 0: no overlap (new iris implementaion does allow + start_overlap = next((time_1.units.num2date(t) + for t in time_1.points if t in time_2.points), + None) + # case 2.0: no overlap (new iris implementaion does allow # concatenation of cubes with no overlap) if not start_overlap: - logger.debug("Unable to concatenate non-overlapping cubes\n%s\nand\n%s" - return cubes - f"separated in time.") - # case 2.1: cube1 ends before cube2 -> use full cube2 and shorten cube1 - if start_overlap and data_end_1 <= data_end_2: - logger.debug(f"Extracting time slice between %s and %s " - f"from cube %s to use it for concatenation " - f"with cube %s", - "-".join([str(data_start_1.year), - str(data_start_1.month), - str(data_start_1.day)]), - "-".join([str(start_overlap.year), - str(start_overlap.month), - str(start_overlap.day)]), - cubes[0], cubes[1]) - c1_delta = extract_time( - cubes[0], - data_start_1.year, data_start_1.month, data_start_1.day, - start_overlap.year, start_overlap.month, start_overlap.day - ) + logger.debug( + "Unable to concatenate non-overlapping cubes\n%s\nand\n%s" + "separated in time.", cubes[0], cubes[1]) + # case 2.1: cube1 ends after cube2 -> return cube1 + elif data_end_1 > data_end_2: + cubes = [cubes[0]] + logger.debug("Using only data from %s", cubes[0]) + # case 2.2: cube1 ends before cube2 -> use full cube2 and shorten cube1 + else: + logger.debug( + "Extracting time slice between %s and %s from cube %s to use " + "it for concatenation with cube %s", "-".join([ + str(data_start_1.year), + str(data_start_1.month), + str(data_start_1.day) + ]), "-".join([ + str(start_overlap.year), + str(start_overlap.month), + str(start_overlap.day) + ]), cubes[0], cubes[1]) + c1_delta = extract_time(cubes[0], data_start_1.year, + data_start_1.month, data_start_1.day, + start_overlap.year, start_overlap.month, + start_overlap.day) cubes = iris.cube.CubeList([c1_delta, cubes[1]]) - logger.debug("Attemptong concatenatenation of %s with %s", + logger.debug("Attempting concatenatenation of %s with %s", c1_delta, cubes[1]) try: cubes = [iris.cube.CubeList(cubes).concatenate_cube()] @@ -360,10 +366,5 @@ def _concatenate_overlapping_cubes(cubes): for cube in cubes: logger.error(cube) raise ex - # case 2.2: cube1 ends after cube2 -> return cube1 - if start_overlap and data_end_1 > data_end_2: - cubes = [cubes[0]] - logger.debug("Using only data from %s", cubes[0]) - # case 2.3: there is no overlap: return original cubes return cubes diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index acc0faee8a..2ee24bae0d 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -2,24 +2,23 @@ import unittest -from cf_units import Unit import numpy as np +from cf_units import Unit from iris.coords import DimCoord from iris.cube import Cube -import iris.exceptions as Exc +from iris.exceptions import ConcatenateError from esmvalcore.preprocessor import _io class TestConcatenate(unittest.TestCase): """Tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" - def setUp(self): """Start tests.""" - self._model_coord = DimCoord( - [1., 2.], var_name='time', standard_name='time', - units='days since 1950-01-01' - ) + self._model_coord = DimCoord([1., 2.], + var_name='time', + standard_name='time', + units='days since 1950-01-01') self.raw_cubes = [] self._add_cube([1., 2.], [1., 2.]) self._add_cube([3., 4.], [3., 4.]) @@ -29,158 +28,146 @@ def _add_cube(self, data, coord): self.raw_cubes.append( Cube(data, var_name='sample', - dim_coords_and_dims=((self._model_coord.copy(coord), 0), ) - ) - ) + dim_coords_and_dims=((self._model_coord.copy(coord), 0), ))) def test_concatenate(self): """Test concatenation of two cubes.""" concatenated = _io.concatenate(self.raw_cubes) - self.assertTrue((concatenated.coord('time').points == np.array( - [1, 2, 3, 4, 5, 6])).all()) + np.testing.assert_array_equal( + concatenated.coord('time').points, np.array([1, 2, 3, 4, 5, 6])) def test_concatenate_with_overlap(self): """Test concatenation of time overalapping cubes""" self._add_cube([6.5, 7.5], [6., 7.]) concatenated = _io.concatenate(self.raw_cubes) - self.assertTrue(np.allclose( + np.testing.assert_array_equal( concatenated.coord('time').points, - np.array([1., 2., 3., 4., 5., 6., 7.]) - )) - self.assertTrue(np.allclose( - concatenated.data, - np.array([1., 2., 3., 4., 5., 6.5, 7.5]) - )) + np.array([1., 2., 3., 4., 5., 6., 7.])) + np.testing.assert_array_equal(concatenated.data, + np.array([1., 2., 3., 4., 5., 6.5, 7.5])) def test_concatenate_with_overlap_2(self): """Test a more generic case.""" self._add_cube([65., 75.], [3., 200.]) self._add_cube([65., 75.], [1000., 7000.]) concatenated = _io.concatenate(self.raw_cubes) - self.assertTrue(np.allclose( + np.testing.assert_array_equal( concatenated.coord('time').points, - np.array([1., 2., 3., 4., 5., 6., 1000., 7000.]) - )) + np.array([1., 2., 3., 4., 5., 6., 1000., 7000.])) def test_concatenate_with_overlap_same_start(self): """Test a more generic case.""" cube1 = self.raw_cubes[0] - raw_cubes = [cube1, ] - time_coord = DimCoord( - [1., 7.], var_name='time', standard_name='time', - units='days since 1950-01-01' - ) + raw_cubes = [ + cube1, + ] + time_coord = DimCoord([1., 7.], + var_name='time', + standard_name='time', + units='days since 1950-01-01') raw_cubes.append( Cube([33., 55.], var_name='sample', - dim_coords_and_dims=((time_coord, 0), ) - ) - ) + dim_coords_and_dims=((time_coord, 0), ))) concatenated = _io.concatenate(raw_cubes) - self.assertTrue(np.allclose( - concatenated.coord('time').points, - np.array([1., 7.]) - )) + np.testing.assert_array_equal( + concatenated.coord('time').points, np.array([1., 7.])) raw_cubes.reverse() concatenated = _io.concatenate(raw_cubes) - self.assertTrue(np.allclose( - concatenated.coord('time').points, - np.array([1., 7.]) - )) + np.testing.assert_array_equal( + concatenated.coord('time').points, np.array([1., 7.])) def test_concatenate_with_iris_exception(self): """Test a more generic case.""" - time_coord_1 = DimCoord( - [1.5, 5., 7.], var_name='time', standard_name='time', - units='days since 1950-01-01') + time_coord_1 = DimCoord([1.5, 5., 7.], + var_name='time', + standard_name='time', + units='days since 1950-01-01') cube1 = Cube([33., 55., 77.], var_name='sample', dim_coords_and_dims=((time_coord_1, 0), )) - time_coord_2 = DimCoord( - [1., 5., 7.], var_name='time', standard_name='time', - units='days since 1950-01-01') + time_coord_2 = DimCoord([1., 5., 7.], + var_name='time', + standard_name='time', + units='days since 1950-01-01') cube2 = Cube([33., 55., 77.], var_name='sample', dim_coords_and_dims=((time_coord_2, 0), )) cubes_single_ovlp = [cube2, cube1] - with self.assertRaises(Exc.ConcatenateError): + with self.assertRaises(ConcatenateError): _io.concatenate(cubes_single_ovlp) def test_concatenate_with_order(self): """Test a more generic case.""" - time_coord_1 = DimCoord( - [1.5, 2., 5., 7.], var_name='time', standard_name='time', - units='days since 1950-01-01') + time_coord_1 = DimCoord([1.5, 2., 5., 7.], + var_name='time', + standard_name='time', + units='days since 1950-01-01') cube1 = Cube([33., 44., 55., 77.], var_name='sample', dim_coords_and_dims=((time_coord_1, 0), )) - time_coord_2 = DimCoord( - [1., 2., 5., 7., 100.], var_name='time', standard_name='time', - units='days since 1950-01-01') + time_coord_2 = DimCoord([1., 2., 5., 7., 100.], + var_name='time', + standard_name='time', + units='days since 1950-01-01') cube2 = Cube([33., 44., 55., 77., 1000.], var_name='sample', dim_coords_and_dims=((time_coord_2, 0), )) cubes_ordered = [cube2, cube1] concatenated = _io.concatenate(cubes_ordered) - self.assertTrue(np.allclose( - concatenated.coord('time').points, - np.array([1., 2., 5., 7., 100.]) - )) + np.testing.assert_array_equal( + concatenated.coord('time').points, np.array([1., 2., 5., 7., + 100.])) cubes_reverse = [cube1, cube2] concatenated = _io.concatenate(cubes_reverse) - self.assertTrue(np.allclose( - concatenated.coord('time').points, - np.array([1., 2., 5., 7., 100.]) - )) + np.testing.assert_array_equal( + concatenated.coord('time').points, np.array([1., 2., 5., 7., + 100.])) def test_fail_on_calendar_concatenate_with_overlap(self): """Test fail of concatenation with overlap.""" - time_coord = DimCoord( - [3., 7000.], var_name='time', standard_name='time', - units=Unit('days since 1950-01-01', calendar='360_day') - ) + time_coord = DimCoord([3., 7000.], + var_name='time', + standard_name='time', + units=Unit('days since 1950-01-01', + calendar='360_day')) self.raw_cubes.append( Cube([33., 55.], var_name='sample', - dim_coords_and_dims=((time_coord, 0), ) - ) - ) + dim_coords_and_dims=((time_coord, 0), ))) with self.assertRaises(TypeError): _io.concatenate(self.raw_cubes) def test_fail_on_units_concatenate_with_overlap(self): """Test fail of concatenation with overlap.""" - time_coord_1 = DimCoord( - [3., 7000.], var_name='time', standard_name='time', - units=Unit('days since 1950-01-01', calendar='360_day') - ) - time_coord_2 = DimCoord( - [3., 9000.], var_name='time', standard_name='time', - units=Unit('days since 1950-01-01', calendar='360_day') - ) - time_coord_3 = DimCoord( - [3., 9000.], var_name='time', standard_name='time', - units=Unit('days since 1850-01-01', calendar='360_day') - ) + time_coord_1 = DimCoord([3., 7000.], + var_name='time', + standard_name='time', + units=Unit('days since 1950-01-01', + calendar='360_day')) + time_coord_2 = DimCoord([3., 9000.], + var_name='time', + standard_name='time', + units=Unit('days since 1950-01-01', + calendar='360_day')) + time_coord_3 = DimCoord([3., 9000.], + var_name='time', + standard_name='time', + units=Unit('days since 1850-01-01', + calendar='360_day')) raw_cubes = [] raw_cubes.append( Cube([33., 55.], var_name='sample', - dim_coords_and_dims=((time_coord_1, 0), ) - ) - ) + dim_coords_and_dims=((time_coord_1, 0), ))) raw_cubes.append( Cube([33., 55.], var_name='sample', - dim_coords_and_dims=((time_coord_2, 0), ) - ) - ) + dim_coords_and_dims=((time_coord_2, 0), ))) raw_cubes.append( Cube([33., 55.], var_name='sample', - dim_coords_and_dims=((time_coord_3, 0), ) - ) - ) + dim_coords_and_dims=((time_coord_3, 0), ))) with self.assertRaises(ValueError): _io.concatenate(raw_cubes) @@ -253,4 +240,4 @@ def test_fix_attributes(self): self.raw_cubes[idx].attributes.update(differing_attrs[idx]) _io._fix_cube_attributes(self.raw_cubes) # noqa for cube in self.raw_cubes: - self.assertTrue(cube.attributes == resulting_attrs) + self.assertEqual(cube.attributes, resulting_attrs)