From baba357f95d801c6a3aff1536b67560ce10282b2 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 21 May 2014 14:45:04 -0700 Subject: [PATCH 1/4] Add keep_attrs to reduction methods --- test/test_data_array.py | 17 ++++++++++++++++- test/test_dataset.py | 16 ++++++++++++++++ test/test_variable.py | 17 ++++++++++++++++- xray/data_array.py | 9 +++++++-- xray/dataset.py | 13 +++++++++++-- xray/variable.py | 15 +++++++++++++-- 6 files changed, 79 insertions(+), 8 deletions(-) diff --git a/test/test_data_array.py b/test/test_data_array.py index cbbab21774b..a8b0e5d23cb 100644 --- a/test/test_data_array.py +++ b/test/test_data_array.py @@ -2,16 +2,20 @@ import pandas as pd from copy import deepcopy from textwrap import dedent +from collections import OrderedDict -from xray import Dataset, DataArray, Variable, align +from xray import Dataset, DataArray, Variable, align, utils from xray.pycompat import iteritems from . import TestCase, ReturnItem, source_ndarray +_attrs = {'units': 'test', 'long_name': 'testing'} + class TestDataArray(TestCase): def setUp(self): self.x = np.random.random((10, 20)) self.v = Variable(['x', 'y'], self.x) + self.va = Variable(['x', 'y'], self.x, _attrs) self.ds = Dataset({'foo': self.v}) self.dv = self.ds['foo'] @@ -262,6 +266,17 @@ def test_reduce(self): # needs more... # should check which extra dimensions are dropped + def test_reduce_keep_attrs(self): + # Test dropped attrs + vm = self.va.mean() + self.assertEqual(len(vm.attrs), 0) + self.assertTrue(utils.dict_equal(vm.attrs, OrderedDict())) + + # Test kept attrs + vm = self.va.mean(keep_attrs=True) + self.assertEqual(len(vm.attrs), len(_attrs)) + self.assertTrue(utils.dict_equal(vm.attrs, _attrs)) + def test_unselect(self): with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'): self.dv.unselect('foo') diff --git a/test/test_dataset.py b/test/test_dataset.py index 9f259d81d45..0e0c440acfe 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -20,6 +20,7 @@ 'var2': ['dim1', 'dim2'], 'var3': ['dim3', 'dim1'], } +_attrs = {'attr1': 'value1', 'attr2': 2929} _testvar = sorted(_vars.keys())[0] _testdim = sorted(_dims.keys())[0] @@ -698,3 +699,18 @@ def test_reduce_non_numeric(self): self.assertDatasetEqual(data1.mean(), data2.mean()) self.assertDatasetEqual(data1.mean(dimension='dim1'), data2.mean(dimension='dim1')) + + def test_reduce_keep_attrs(self): + data = create_test_data() + attrs = OrderedDict(_attrs) + data.attrs = attrs + + # Test dropped attrs + ds = data.mean() + self.assertEqual(len(ds.attrs), 0) + self.assertTrue(utils.dict_equal(ds.attrs, OrderedDict())) + + # Test kept attrs + ds = data.mean(keep_attrs=True) + self.assertEqual(len(ds.attrs), len(_attrs)) + self.assertTrue(utils.dict_equal(ds.attrs, attrs)) diff --git a/test/test_variable.py b/test/test_variable.py index 632db73a0a0..11469b20f65 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -6,13 +6,15 @@ import numpy as np import pandas as pd -from xray import Variable, Dataset, DataArray, indexing +from xray import Variable, Dataset, DataArray, indexing, utils from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter, PandasIndexAdapter, _as_compatible_data) from xray.pycompat import PY3 from . import TestCase, source_ndarray +_attrs = {'units': 'test', 'long_name': 'testing'} + class VariableSubclassTestCases(object): def test_properties(self): @@ -530,6 +532,19 @@ def test_reduce(self): with self.assertRaisesRegexp(ValueError, 'cannot supply both'): v.mean(dimension='x', axis=0) + def test_reduce_keep_attrs(self): + v = Variable(['x', 'y'], self.d, _attrs) + + # Test dropped attrs + vm = v.mean() + self.assertEqual(len(vm.attrs), 0) + self.assertTrue(utils.dict_equal(vm.attrs, OrderedDict())) + + # Test kept attrs + vm = v.mean(keep_attrs=True) + self.assertEqual(len(vm.attrs), len(_attrs)) + self.assertTrue(utils.dict_equal(vm.attrs, _attrs)) + class TestCoordinate(TestCase, VariableSubclassTestCases): cls = staticmethod(Coordinate) diff --git a/xray/data_array.py b/xray/data_array.py index 2a393cee75e..b8fbd81656a 100644 --- a/xray/data_array.py +++ b/xray/data_array.py @@ -465,7 +465,8 @@ def squeeze(self, dimension=None): ds = self.dataset.squeeze(dimension) return ds[self.name] - def reduce(self, func, dimension=None, axis=None, **kwargs): + def reduce(self, func, dimension=None, axis=None, keep_attrs=False, + **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -481,6 +482,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): 'dimension' and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `f(x)` without an axis argument). + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -490,7 +495,7 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): DataArray with this object's array replaced with an array with summarized data and the indicated dimension(s) removed. """ - var = self.variable.reduce(func, dimension, axis, **kwargs) + var = self.variable.reduce(func, dimension, axis, keep_attrs, **kwargs) drop = set(self.dimensions) - set(var.dimensions) # For now, take an aggressive strategy of removing all variables # associated with any dropped dimensions diff --git a/xray/dataset.py b/xray/dataset.py index 27a80c4cd0f..3dbc88294be 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -1005,7 +1005,7 @@ def func(self, dimension=None, **kwargs): cls=cls.__name__) return func - def reduce(self, func, dimension=None, **kwargs): + def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): """Reduce this dataset by applying `func` along some dimension(s). Parameters @@ -1017,6 +1017,10 @@ def reduce(self, func, dimension=None, **kwargs): dimension : str or sequence of str, optional Dimension(s) over which to apply `func`. By default `func` is applied over all dimensions. + keep_attrs : bool, optional + If True, the datasets's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -1027,6 +1031,11 @@ def reduce(self, func, dimension=None, **kwargs): of summarized data and the indicated dimension(s) removed. """ + if keep_attrs: + attrs = self.attrs + else: + attrs = OrderedDict() + if isinstance(dimension, basestring): dims = set([dimension]) elif dimension is None: @@ -1052,7 +1061,7 @@ def reduce(self, func, dimension=None, **kwargs): pass else: variables[name] = var - return Dataset(variables=variables) + return Dataset(variables=variables, attributes=attrs) @classmethod def concat(cls, datasets, dimension='concat_dimension', indexers=None, diff --git a/xray/variable.py b/xray/variable.py index 97b43e0ebb5..6a0b05f8a17 100644 --- a/xray/variable.py +++ b/xray/variable.py @@ -457,7 +457,8 @@ def squeeze(self, dimension=None): dimensions = dict(zip(self.dimensions, self.shape)) return utils.squeeze(self, dimensions, dimension) - def reduce(self, func, dimension=None, axis=None, **kwargs): + def reduce(self, func, dimension=None, axis=None, keep_attrs=False, + **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -473,6 +474,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `func(x)` without an axis argument). + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -482,6 +487,12 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ + + if keep_attrs: + attrs = self.attrs + else: + attrs = OrderedDict() + if dimension is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dimension' " "arguments") @@ -495,7 +506,7 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): dims = [dim for n, dim in enumerate(self.dimensions) if n not in removed_axes] - return Variable(dims, data) + return Variable(dims, data, attributes=attrs) @classmethod def concat(cls, variables, dimension='stacked_dimension', From 3a503747f09728e0ec3131cc468956878ada2b8c Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 21 May 2014 15:10:39 -0700 Subject: [PATCH 2/4] simplify copying of attributes and keep_attrs tests --- test/test_data_array.py | 6 +++--- test/test_dataset.py | 4 ++-- test/test_variable.py | 6 +++--- xray/dataset.py | 8 +++----- xray/variable.py | 7 ++----- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/test/test_data_array.py b/test/test_data_array.py index a8b0e5d23cb..2e51e72bcab 100644 --- a/test/test_data_array.py +++ b/test/test_data_array.py @@ -4,7 +4,7 @@ from textwrap import dedent from collections import OrderedDict -from xray import Dataset, DataArray, Variable, align, utils +from xray import Dataset, DataArray, Variable, align from xray.pycompat import iteritems from . import TestCase, ReturnItem, source_ndarray @@ -270,12 +270,12 @@ def test_reduce_keep_attrs(self): # Test dropped attrs vm = self.va.mean() self.assertEqual(len(vm.attrs), 0) - self.assertTrue(utils.dict_equal(vm.attrs, OrderedDict())) + self.assertEqual(vm.attrs, OrderedDict()) # Test kept attrs vm = self.va.mean(keep_attrs=True) self.assertEqual(len(vm.attrs), len(_attrs)) - self.assertTrue(utils.dict_equal(vm.attrs, _attrs)) + self.assertEqual(vm.attrs, _attrs) def test_unselect(self): with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'): diff --git a/test/test_dataset.py b/test/test_dataset.py index 0e0c440acfe..304d3a1d704 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -708,9 +708,9 @@ def test_reduce_keep_attrs(self): # Test dropped attrs ds = data.mean() self.assertEqual(len(ds.attrs), 0) - self.assertTrue(utils.dict_equal(ds.attrs, OrderedDict())) + self.assertEqual(ds.attrs, OrderedDict()) # Test kept attrs ds = data.mean(keep_attrs=True) self.assertEqual(len(ds.attrs), len(_attrs)) - self.assertTrue(utils.dict_equal(ds.attrs, attrs)) + self.assertTrue(ds.attrs, attrs) diff --git a/test/test_variable.py b/test/test_variable.py index 11469b20f65..57ec7c19724 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from xray import Variable, Dataset, DataArray, indexing, utils +from xray import Variable, Dataset, DataArray, indexing from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter, PandasIndexAdapter, _as_compatible_data) from xray.pycompat import PY3 @@ -538,12 +538,12 @@ def test_reduce_keep_attrs(self): # Test dropped attrs vm = v.mean() self.assertEqual(len(vm.attrs), 0) - self.assertTrue(utils.dict_equal(vm.attrs, OrderedDict())) + self.assertEqual(vm.attrs, OrderedDict()) # Test kept attrs vm = v.mean(keep_attrs=True) self.assertEqual(len(vm.attrs), len(_attrs)) - self.assertTrue(utils.dict_equal(vm.attrs, _attrs)) + self.assertEqual(vm.attrs, _attrs) class TestCoordinate(TestCase, VariableSubclassTestCases): diff --git a/xray/dataset.py b/xray/dataset.py index 3dbc88294be..72b724ee8e5 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -1031,11 +1031,6 @@ def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): of summarized data and the indicated dimension(s) removed. """ - if keep_attrs: - attrs = self.attrs - else: - attrs = OrderedDict() - if isinstance(dimension, basestring): dims = set([dimension]) elif dimension is None: @@ -1061,6 +1056,9 @@ def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): pass else: variables[name] = var + + attrs = self.attrs if keep_attrs else {} + return Dataset(variables=variables, attributes=attrs) @classmethod diff --git a/xray/variable.py b/xray/variable.py index 6a0b05f8a17..402997731f3 100644 --- a/xray/variable.py +++ b/xray/variable.py @@ -488,11 +488,6 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, removed. """ - if keep_attrs: - attrs = self.attrs - else: - attrs = OrderedDict() - if dimension is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dimension' " "arguments") @@ -506,6 +501,8 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, dims = [dim for n, dim in enumerate(self.dimensions) if n not in removed_axes] + attrs = self.attrs if keep_attrs else {} + return Variable(dims, data, attributes=attrs) @classmethod From da8f2dd2906d7d6449815570bc176a3b328216d6 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 21 May 2014 15:35:28 -0700 Subject: [PATCH 3/4] clean up test style and add a bit of documentation --- test/test_data_array.py | 9 ++++----- test/test_dataset.py | 3 ++- test/test_variable.py | 4 ++-- xray/common.py | 4 ++++ xray/data_array.py | 4 ++++ xray/dataset.py | 4 ++++ 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/test/test_data_array.py b/test/test_data_array.py index 2e51e72bcab..edac6926039 100644 --- a/test/test_data_array.py +++ b/test/test_data_array.py @@ -8,14 +8,13 @@ from xray.pycompat import iteritems from . import TestCase, ReturnItem, source_ndarray -_attrs = {'units': 'test', 'long_name': 'testing'} - class TestDataArray(TestCase): def setUp(self): + self._attrs = {'attr1': 'value1', 'attr2': 2929} self.x = np.random.random((10, 20)) self.v = Variable(['x', 'y'], self.x) - self.va = Variable(['x', 'y'], self.x, _attrs) + self.va = Variable(['x', 'y'], self.x, self._attrs) self.ds = Dataset({'foo': self.v}) self.dv = self.ds['foo'] @@ -274,8 +273,8 @@ def test_reduce_keep_attrs(self): # Test kept attrs vm = self.va.mean(keep_attrs=True) - self.assertEqual(len(vm.attrs), len(_attrs)) - self.assertEqual(vm.attrs, _attrs) + self.assertEqual(len(vm.attrs), len(self._attrs)) + self.assertEqual(vm.attrs, self._attrs) def test_unselect(self): with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'): diff --git a/test/test_dataset.py b/test/test_dataset.py index 304d3a1d704..8aa602bd76f 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -20,7 +20,6 @@ 'var2': ['dim1', 'dim2'], 'var3': ['dim3', 'dim1'], } -_attrs = {'attr1': 'value1', 'attr2': 2929} _testvar = sorted(_vars.keys())[0] _testdim = sorted(_dims.keys())[0] @@ -702,6 +701,8 @@ def test_reduce_non_numeric(self): def test_reduce_keep_attrs(self): data = create_test_data() + _attrs = {'attr1': 'value1', 'attr2': 2929} + attrs = OrderedDict(_attrs) data.attrs = attrs diff --git a/test/test_variable.py b/test/test_variable.py index 57ec7c19724..30250378d21 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -13,8 +13,6 @@ from . import TestCase, source_ndarray -_attrs = {'units': 'test', 'long_name': 'testing'} - class VariableSubclassTestCases(object): def test_properties(self): @@ -533,6 +531,8 @@ def test_reduce(self): v.mean(dimension='x', axis=0) def test_reduce_keep_attrs(self): + _attrs = {'units': 'test', 'long_name': 'testing'} + v = Variable(['x', 'y'], self.d, _attrs) # Test dropped attrs diff --git a/xray/common.py b/xray/common.py index 3575d9d4115..bf27b7ab9a2 100644 --- a/xray/common.py +++ b/xray/common.py @@ -96,6 +96,10 @@ def _get_axis_num(self, dim): and 'axis' arguments can be supplied. If neither are supplied, then `{name}` is calculated over the flattened array (by calling `{name}(x)` without an axis argument). + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to `{name}`. diff --git a/xray/data_array.py b/xray/data_array.py index b8fbd81656a..dff43651689 100644 --- a/xray/data_array.py +++ b/xray/data_array.py @@ -504,6 +504,10 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, if any(dim in drop for dim in v.dimensions)} ds = self.dataset.unselect(*drop) ds[self.name] = var + + if keep_attrs: + ds.attrs = self.dataset.attrs + return ds[self.name] @classmethod diff --git a/xray/dataset.py b/xray/dataset.py index 72b724ee8e5..462499b4097 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -983,6 +983,10 @@ def squeeze(self, dimension=None): dimension : str or sequence of str, optional Dimension(s) over which to apply `func`. By default `func` is applied over all dimensions. + keep_attrs : bool, optional + If True, the datasets's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to `{name}`. From 555def48f18e75246a91decd4a3b3c951e247ff1 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 21 May 2014 17:24:04 -0700 Subject: [PATCH 4/4] update signatures of numpy reduction functions --- xray/common.py | 4 ++-- xray/dataset.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xray/common.py b/xray/common.py index bf27b7ab9a2..549dee1102b 100644 --- a/xray/common.py +++ b/xray/common.py @@ -5,8 +5,8 @@ class ImplementsReduce(object): @classmethod def _reduce_method(cls, f, name=None, module=None): - def func(self, dimension=None, axis=None, **kwargs): - return self.reduce(f, dimension, axis, **kwargs) + def func(self, dimension=None, axis=None, keep_attrs=False, **kwargs): + return self.reduce(f, dimension, axis, keep_attrs, **kwargs) if name is None: name = f.__name__ func.__name__ = name diff --git a/xray/dataset.py b/xray/dataset.py index 462499b4097..76e3f6761c1 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -999,8 +999,8 @@ def squeeze(self, dimension=None): @classmethod def _reduce_method(cls, f, name=None, module=None): - def func(self, dimension=None, **kwargs): - return self.reduce(f, dimension, **kwargs) + def func(self, dimension=None, keep_attrs=False, **kwargs): + return self.reduce(f, dimension, keep_attrs, **kwargs) if name is None: name = f.__name__ func.__name__ = name