From f3680469793e1212295eb5ca7089bd9d1121a566 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Thu, 3 Dec 2015 17:22:08 -0800
Subject: [PATCH] Rework DataArray internals

Fixes GH367
Fixes GH634

The internal data model used by :py:class:`~xray.DataArray` has been
rewritten to fix several outstanding issues (:issue:`367`, :issue:`634`,
`this stackoverflow report`_). Internally, ``DataArray`` is now implemented
in terms of ``._variable`` and ``._coords`` attributes instead of holding
variables in a ``Dataset`` object.
---
 doc/whats-new.rst           |  58 ++++++
 xray/core/alignment.py      |  10 +
 xray/core/combine.py        |  25 ++-
 xray/core/common.py         |   4 +-
 xray/core/coordinates.py    |  91 +++++----
 xray/core/dataarray.py      | 391 +++++++++++++++++++-----------------
 xray/core/dataset.py        | 220 +++++---------------
 xray/core/groupby.py        |  24 +--
 xray/core/merge.py          | 170 ++++++++++++++++
 xray/core/variable.py       |  22 +-
 xray/test/__init__.py       |   3 +-
 xray/test/test_backends.py  |   4 +-
 xray/test/test_combine.py   |   9 +-
 xray/test/test_dask.py      |   2 +-
 xray/test/test_dataarray.py |  86 ++++++--
 xray/test/test_dataset.py   |  19 +-
 xray/test/test_plot.py      |   2 +-
 xray/test/test_variable.py  |  18 +-
 18 files changed, 702 insertions(+), 456 deletions(-)
 create mode 100644 xray/core/merge.py

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 4d405c7e4b2..3461433f9a2 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -9,6 +9,64 @@ What's New
     import xray
     np.random.seed(123456)
 
+v0.7.0 (unreleased)
+-------------------
+
+.. _v0.7.0.breaking:
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+- The internal data model used by :py:class:`~xray.DataArray` has been
+  rewritten to fix several outstanding issues (:issue:`367`, :issue:`634`,
+  `this stackoverflow report`_). Internally, ``DataArray`` is now implemented
+  in terms of ``._variable`` and ``._coords`` attributes instead of holding
+  variables in a ``Dataset`` object.
+
+  This refactor ensures that if a DataArray has the
+  same name as one of its coordinates, the array and the coordinate no longer
+  share the same data.
+
+  In practice, this means that creating a DataArray with the same ``name`` as
+  one of its dimensions no longer automatically uses that array to label the
+  corresponding coordinate. You will now need to provide coordinate labels
+  explicitly. Here's the old behavior:
+
+  .. ipython::
+    :verbatim:
+
+    In [2]: xray.DataArray([4, 5, 6], dims='x', name='x')
+    Out[2]:
+    <xray.DataArray 'x' (x: 3)>
+    array([4, 5, 6])
+    Coordinates:
+      * x        (x) int64 4 5 6
+
+  and the new behavior (compare the values of the ``x`` coordinate):
+
+  .. ipython::
+    :verbatim:
+
+    In [2]: xray.DataArray([4, 5, 6], dims='x', name='x')
+    Out[2]:
+    <xray.DataArray 'x' (x: 3)>
+    array([4, 5, 6])
+    Coordinates:
+      * x        (x) int64 0 1 2
+
+- It is no longer possible to convert a DataArray to a Dataset with
+  :py:meth:`xray.DataArray.to_dataset` if it is unnamed. This will now
+  raise ``ValueError``. If the array is unnamed, you need to supply the
+  ``name`` argument.
+
+.. _this stackoverflow report: http://stackoverflow.com/questions/33158558/python-xray-extract-first-and-last-time-value-within-each-month-of-a-timeseries
+
+Bug fixes
+~~~~~~~~~
+
+- Fixes for several issues found on ``DataArray`` objects with the same name
+  as one of their coordinates (see :ref:`v0.7.0.breaking` for more details).
+
 v0.6.2 (unreleased)
 -------------------
 
diff --git a/xray/core/alignment.py b/xray/core/alignment.py
index 9afb04feac9..737fc4494cb 100644
--- a/xray/core/alignment.py
+++ b/xray/core/alignment.py
@@ -100,6 +100,16 @@ def partial_align(*objects, **kwargs):
     return tuple(obj.reindex(copy=copy, **joined_indexes) for obj in objects)
 
 
+def align_variables(variables, join='outer', copy=False):
+    """Align all DataArrays in the provided dict, leaving other values alone.
+    """
+    alignable = [k for k, v in variables.items() if hasattr(v, 'indexes')]
+    aligned = align(*[variables[a] for a in alignable], join=join, copy=copy)
+    new_variables = OrderedDict(variables)
+    new_variables.update(zip(alignable, aligned))
+    return new_variables
+
+
 def reindex_variables(variables, indexes, indexers, method=None,
                       tolerance=None, copy=True):
     """Conform a dictionary of aligned variables onto a new set of variables,
diff --git a/xray/core/combine.py b/xray/core/combine.py
index b331232c1bd..6946981da57 100644
--- a/xray/core/combine.py
+++ b/xray/core/combine.py
@@ -4,7 +4,7 @@
 
 from . import utils
 from .pycompat import iteritems, reduce, OrderedDict, basestring
-from .variable import Variable
+from .variable import Variable, as_variable, Coordinate
 
 
 def concat(objs, dim=None, data_vars='all', coords='different',
@@ -120,8 +120,6 @@ def _calc_concat_dim_coord(dim):
     Infer the dimension name and 1d coordinate variable (if appropriate)
     for concatenating along the new dimension.
     """
-    from .dataarray import DataArray
-
     if isinstance(dim, basestring):
         coord = None
     elif not hasattr(dim, 'dims'):
@@ -129,8 +127,11 @@ def _calc_concat_dim_coord(dim):
         dim_name = getattr(dim, 'name', None)
         if dim_name is None:
             dim_name = 'concat_dim'
-        coord = DataArray(dim, dims=dim_name, name=dim_name)
+        coord = Coordinate(dim_name, dim)
         dim = dim_name
+    elif not hasattr(dim, 'name'):
+        coord = as_variable(dim).to_coord()
+        dim, = coord.dims
     else:
         coord = dim
         dim, = coord.dims
@@ -207,6 +208,7 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
     concat_over = _calc_concat_over(datasets, dim, data_vars, coords)
 
     def insert_result_variable(k, v):
+        assert isinstance(v, Variable)
         if k in datasets[0].coords:
             result_coord_names.add(k)
         result_vars[k] = v
@@ -267,22 +269,19 @@ def ensure_common_dims(vars):
         combined = Variable.concat(vars, dim, positions)
         insert_result_variable(k, combined)
 
-    # result._coord_names.update(datasets[0].coords)
+    result = Dataset(result_vars, attrs=result_attrs)
+    result = result.set_coords(result_coord_names)
 
     if coord is not None:
         # add concat dimension last to ensure that its in the final Dataset
-        insert_result_variable(coord.name, coord)
-        # result[coord.name] = coord
-
-    result = Dataset(result_vars, attrs=result_attrs)
-    result = result.set_coords(result_coord_names)
+        result[coord.name] = coord
 
     return result
 
 
 def _dataarray_concat(arrays, dim, data_vars, coords, compat,
                       positions):
-    from .dataarray import DataArray
+    arrays = list(arrays)
 
     if data_vars != 'all':
         raise ValueError('data_vars is not a valid argument when '
@@ -297,11 +296,11 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
                 raise ValueError('array names not identical')
             else:
                 arr = arr.rename(name)
-        datasets.append(arr._dataset)
+        datasets.append(arr._to_temp_dataset())
 
     ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
                          positions)
-    return DataArray._new_from_dataset_no_copy(ds, name)
+    return arrays[0]._from_temp_dataset(ds, name)
 
 
 def _auto_concat(datasets, dim=None):
diff --git a/xray/core/common.py b/xray/core/common.py
index 6d031c1d577..d2067fdfa85 100644
--- a/xray/core/common.py
+++ b/xray/core/common.py
@@ -186,7 +186,7 @@ def assign_coords(self, **kwargs):
         Dataset.assign
         """
         data = self.copy(deep=False)
-        results = data._calc_assign_results(kwargs)
+        results = self._calc_assign_results(kwargs)
         data.coords.update(results)
         return data
 
@@ -333,7 +333,7 @@ def resample(self, freq, dim, how='mean', skipna=None, closed=None,
         RESAMPLE_DIM = '__resample_dim__'
         if isinstance(dim, basestring):
             dim = self[dim]
-        group = DataArray(dim, name=RESAMPLE_DIM)
+        group = DataArray(dim, [(RESAMPLE_DIM, dim)], name=RESAMPLE_DIM)
         time_grouper = pd.TimeGrouper(freq=freq, how=how, closed=closed,
                                       label=label, base=base)
         gb = self.groupby_cls(self, group, grouper=time_grouper)
diff --git a/xray/core/coordinates.py b/xray/core/coordinates.py
index f7906ac78c7..3436b3e8b61 100644
--- a/xray/core/coordinates.py
+++ b/xray/core/coordinates.py
@@ -2,8 +2,9 @@
 from contextlib import contextmanager
 import pandas as pd
 
-from .pycompat import iteritems, basestring, OrderedDict
 from . import formatting
+from .merge import merge_dataarray_coords
+from .pycompat import iteritems, basestring, OrderedDict
 
 
 def _coord_merge_finalize(target, other, target_conflicts, other_conflicts,
@@ -37,16 +38,12 @@ def _dim_shape(var):
 
 
 class AbstractCoordinates(Mapping):
-    @property
-    def _names(self):
-        return self._dataset._coord_names
-
     def __getitem__(self, key):
         if (key in self._names or
             (isinstance(key, basestring) and
              key.split('.')[0] in self._names)):
             # allow indexing current coordinates or components
-            return self._dataset[key]
+            return self._data[key]
         else:
             raise KeyError(key)
 
@@ -55,7 +52,7 @@ def __setitem__(self, key, value):
 
     def __iter__(self):
         # needs to be in the same order as the dataset variables
-        for k in self._dataset._variables:
+        for k in self._variables:
             if k in self._names:
                 yield k
 
@@ -65,30 +62,19 @@ def __len__(self):
     def __contains__(self, key):
         return key in self._names
 
-    def __delitem__(self, key):
-        if key in self:
-            del self._dataset[key]
-        else:
-            raise KeyError(key)
-
     def __repr__(self):
         return formatting.coords_repr(self)
 
     @property
     def dims(self):
-        return self._dataset.dims
-
-    def to_dataset(self):
-        """Convert these coordinates into a new Dataset
-        """
-        return self._dataset._copy_listed(self._names)
+        return self._data.dims
 
     def to_index(self, ordered_dims=None):
         """Convert all index coordinates into a :py:class:`pandas.MultiIndex`
         """
         if ordered_dims is None:
             ordered_dims = self.dims
-        indexes = [self._dataset._variables[k].to_index() for k in ordered_dims]
+        indexes = [self._variables[k].to_index() for k in ordered_dims]
         return pd.MultiIndex.from_product(indexes, names=list(ordered_dims))
 
     def _merge_validate(self, other):
@@ -100,7 +86,7 @@ def _merge_validate(self, other):
         promote_dims = {}
         for k in self:
             if k in other:
-                self_var = self._dataset._variables[k]
+                self_var = self._variables[k]
                 other_var = other[k].variable
                 if not self_var.broadcast_equals(other_var):
                     if k in self.dims and k in other.dims:
@@ -165,12 +151,31 @@ class DatasetCoordinates(AbstractCoordinates):
     objects.
     """
     def __init__(self, dataset):
-        self._dataset = dataset
+        self._data = dataset
+
+    @property
+    def _names(self):
+        return self._data._coord_names
+
+    @property
+    def _variables(self):
+        return self._data._variables
+
+    def to_dataset(self):
+        """Convert these coordinates into a new Dataset
+        """
+        return self._data._copy_listed(self._names)
 
     def update(self, other):
-        self._dataset.update(other)
+        self._data.update(other)
         self._names.update(other.keys())
 
+    def __delitem__(self, key):
+        if key in self:
+            del self._data[key]
+        else:
+            raise KeyError(key)
+
 
 class DataArrayCoordinates(AbstractCoordinates):
     """Dictionary like container for DataArray coordinates.
@@ -180,20 +185,38 @@ class DataArrayCoordinates(AbstractCoordinates):
     objects.
     """
     def __init__(self, dataarray):
-        self._dataarray = dataarray
-        self._dataset = dataarray._dataset
+        self._data = dataarray
 
-    def update(self, other):
-        with self._dataarray._set_new_dataset() as ds:
-            ds.coords.update(other)
-            bad_dims = [d for d in ds.dims if d not in self.dims]
-            if bad_dims:
-                raise ValueError('DataArray does not include all coordinate '
-                                 'dimensions: %s' % bad_dims)
+    @property
+    def _names(self):
+        return set(self._data._coords)
 
     @property
-    def dims(self):
-        return self._dataarray.dims
+    def _variables(self):
+        return self._data._coords
+
+    def _to_dataset(self, shallow_copy=True):
+        from .dataset import Dataset
+        coords = OrderedDict((k, v.copy(deep=False) if shallow_copy else v)
+                             for k, v in self._data._coords.items())
+        dims = dict(zip(self.dims, self._data.shape))
+        return Dataset._construct_direct(coords, coord_names=set(self._names),
+                                         dims=dims, attrs=None)
+
+    def to_dataset(self):
+        return self._to_dataset()
+
+    def update(self, other):
+        new_vars = merge_dataarray_coords(
+            self._data.indexes, self._data._coords, other)
+
+        self._data._coords = new_vars
+
+    def __delitem__(self, key):
+        if key in self.dims:
+            raise ValueError('cannot delete a coordinate corresponding to a '
+                             'DataArray dimension')
+        del self._data._coords[key]
 
 
 class Indexes(Mapping):
diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py
index c905f886917..7c7584d7338 100644
--- a/xray/core/dataarray.py
+++ b/xray/core/dataarray.py
@@ -11,13 +11,13 @@
 from . import groupby
 from . import ops
 from . import utils
-from . import variable
 from .alignment import align
-from .common import AbstractArray, BaseDataObject
+from .common import AbstractArray, BaseDataObject, squeeze
 from .coordinates import DataArrayCoordinates, Indexes
 from .dataset import Dataset
 from .pycompat import iteritems, basestring, OrderedDict, zip
-from .variable import as_variable, _as_compatible_data, Coordinate
+from .variable import (as_variable, Variable, as_compatible_data, Coordinate,
+                       default_index_coordinate)
 from .formatting import format_item
 
 
@@ -31,7 +31,7 @@ def _infer_coords_and_dims(shape, coords, dims):
                          'data' % (len(coords), len(shape)))
 
     if isinstance(dims, basestring):
-        dims = [dims]
+        dims = (dims,)
 
     if dims is None:
         dims = ['dim_%s' % n for n in range(len(shape))]
@@ -41,24 +41,36 @@ def _infer_coords_and_dims(shape, coords, dims):
                 dims = list(coords.keys())
             else:
                 for n, (dim, coord) in enumerate(zip(dims, coords)):
-                    if getattr(coord, 'name', None) is None:
-                        coord = as_variable(coord, key=dim).to_coord()
+                    coord = as_variable(coord, key=dim).to_coord()
                     dims[n] = coord.name
+        dims = tuple(dims)
     else:
         for d in dims:
             if not isinstance(d, basestring):
                 raise TypeError('dimension %s is not a string' % d)
-        if coords is not None and not utils.is_dict_like(coords):
-            # ensure coordinates have the right dimensions
-            coords = [Coordinate(dim, coord, getattr(coord, 'attrs', {}))
-                      for dim, coord in zip(dims, coords)]
 
-    if coords is None:
-        coords = {}
-    elif not utils.is_dict_like(coords):
-        coords = OrderedDict(zip(dims, coords))
+    new_coords = OrderedDict()
 
-    return coords, dims
+    if utils.is_dict_like(coords):
+        for k, v in coords.items():
+            new_coords[k] = as_variable(v, key=k, copy=True)
+    elif coords is not None:
+        for dim, coord in zip(dims, coords):
+            var = as_variable(coord, key=dim, copy=True)
+            var.dims = (dim,)
+            new_coords[dim] = var
+
+    for dim, size in zip(dims, shape):
+        if dim not in new_coords:
+            new_coords[dim] = default_index_coordinate(dim, size)
+
+    for k, v in new_coords.items():
+        if any(d not in dims for d in v.dims):
+            raise ValueError('coordinate %s has dimensions %s, but these '
+                             'are not a subset of the DataArray '
+                             'dimensions %s' % (k, v.dims, dims))
+
+    return new_coords, dims
 
 
 class _LocIndexer(object):
@@ -86,6 +98,14 @@ def __setitem__(self, key, value):
         self.data_array[self._remap_key(key)] = value
 
 
+class _ThisArray(object):
+    """An instance of this object is used as the key corresponding to the
+    variable when converting arbitrary DataArray objects to datasets
+    """
+    def __repr__(self):
+        return '<this-array>'
+
+
 class DataArray(AbstractArray, BaseDataObject):
     """N-dimensional array with labeled coordinates and dimensions.
 
@@ -127,7 +147,7 @@ class DataArray(AbstractArray, BaseDataObject):
     groupby_cls = groupby.DataArrayGroupBy
 
     def __init__(self, data, coords=None, dims=None, name=None,
-                 attrs=None, encoding=None):
+                 attrs=None, encoding=None, fastpath=False):
         """
         Parameters
         ----------
@@ -159,67 +179,71 @@ def __init__(self, data, coords=None, dims=None, name=None,
             'units' and 'calendar' (the later two only for datetime arrays).
             Unrecognized keys are ignored.
         """
-        # try to fill in arguments from data if they weren't supplied
-        if coords is None:
-            coords = getattr(data, 'coords', None)
-            if isinstance(data, pd.Series):
-                coords = [data.index]
-            elif isinstance(data, pd.DataFrame):
-                coords = [data.index, data.columns]
-            elif isinstance(data, (pd.Index, variable.Coordinate)):
-                coords = [data]
-            elif isinstance(data, pd.Panel):
-                coords = [data.items, data.major_axis, data.minor_axis]
-        if dims is None:
-            dims = getattr(data, 'dims', getattr(coords, 'dims', None))
-        if name is None:
-            name = getattr(data, 'name', None)
-        if attrs is None:
-            attrs = getattr(data, 'attrs', None)
-        if encoding is None:
-            encoding = getattr(data, 'encoding', None)
-
-        data = _as_compatible_data(data)
-        coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
-        dataset = Dataset(coords=coords)
-        # insert data afterwards in case of redundant coords/data
-        dataset[name] = (dims, data, attrs, encoding)
-
-        for k, v in iteritems(dataset.coords):
-            if any(d not in dims for d in v.dims):
-                raise ValueError('coordinate %s has dimensions %s, but these '
-                                 'are not a subset of the DataArray '
-                                 'dimensions %s' % (k, v.dims, dims))
-
-        # these fully describe a DataArray
-        self._dataset = dataset
+        if fastpath:
+            variable = data
+            assert dims is None
+            assert attrs is None
+            assert encoding is None
+        else:
+            # try to fill in arguments from data if they weren't supplied
+            if coords is None:
+                coords = getattr(data, 'coords', None)
+                if isinstance(data, pd.Series):
+                    coords = [data.index]
+                elif isinstance(data, pd.DataFrame):
+                    coords = [data.index, data.columns]
+                elif isinstance(data, (pd.Index, Coordinate)):
+                    coords = [data]
+                elif isinstance(data, pd.Panel):
+                    coords = [data.items, data.major_axis, data.minor_axis]
+            if dims is None:
+                dims = getattr(data, 'dims', getattr(coords, 'dims', None))
+            if name is None:
+                name = getattr(data, 'name', None)
+            if attrs is None:
+                attrs = getattr(data, 'attrs', None)
+            if encoding is None:
+                encoding = getattr(data, 'encoding', None)
+
+            data = as_compatible_data(data)
+            coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
+            variable = Variable(dims, data, attrs, encoding, fastpath=True)
+
+        # These fully describe a DataArray
+        self._variable = variable
+        self._coords = coords
         self._name = name
 
-    @classmethod
-    def _new_from_dataset(cls, original_dataset, name):
-        """Private constructor for the benefit of Dataset.__getitem__ (skips
-        all validation)
-        """
-        dataset = original_dataset._copy_listed([name], keep_attrs=False)
-        if name not in dataset:
-            # handle virtual variables
-            try:
-                _, name = name.split('.', 1)
-            except Exception:
-                raise KeyError(name)
-        if name not in dataset._dims:
-            dataset._coord_names.discard(name)
-        return cls._new_from_dataset_no_copy(dataset, name)
+    __default = object()
 
-    @classmethod
-    def _new_from_dataset_no_copy(cls, dataset, name):
-        obj = object.__new__(cls)
-        obj._dataset = dataset
-        obj._name = name
-        return obj
+    def _replace(self, variable=None, coords=None, name=__default):
+        if variable is None:
+            variable = self.variable
+        if coords is None:
+            coords = self._coords
+        if name is self.__default:
+            name = self.name
+        return type(self)(variable, coords, name=name, fastpath=True)
+
+    def _replace_maybe_drop_dims(self, variable, name=__default):
+        if variable.dims == self.dims:
+            coords = None
+        else:
+            allowed_dims = set(variable.dims)
+            coords = OrderedDict((k, v) for k, v in self._coords.items()
+                                 if set(v.dims) <= allowed_dims)
+        return self._replace(variable, coords, name)
+
+    __this_array = _ThisArray()
 
-    def _with_replaced_dataset(self, dataset):
-        return self._new_from_dataset_no_copy(dataset, self.name)
+    def _to_temp_dataset(self):
+        return self._to_dataset_whole(name=self.__this_array,
+                                      shallow_copy=False)
+
+    def _from_temp_dataset(self, dataset, name=__default):
+        variable = dataset._variables.pop(self.__this_array)
+        coords = dataset._variables
+        return self._replace(variable, coords, name)
 
     def _to_dataset_split(self, dim):
         def subset(dim, label):
@@ -233,11 +257,18 @@ def subset(dim, label):
         del coords[dim]
         return Dataset(variables, coords, self.attrs)
 
-    def _to_dataset_whole(self, name):
+    def _to_dataset_whole(self, name=None, shallow_copy=True):
         if name is None:
-            return self._dataset.copy()
-        else:
-            return self.rename(name)._dataset
+            name = self.name
+        if name is None:
+            raise ValueError('unable to convert unnamed DataArray to a '
+                             'Dataset without providing an explicit name')
+        if name in self.coords:
+            raise ValueError('cannot create a Dataset from a DataArray with '
+                             'the same name as one of its coordinates')
+        dataset = self.coords._to_dataset(shallow_copy=shallow_copy)
+        dataset[name] = self.variable
+        return dataset
 
     def to_dataset(self, dim=None, name=None):
         """Convert a DataArray to a Dataset.
@@ -277,24 +308,13 @@ def name(self):
         """
         return self._name
 
-    @contextlib.contextmanager
-    def _set_new_dataset(self):
-        """Context manager to use for modifying _dataset, in a manner that
-        can be safely rolled back if an error is encountered.
-        """
-        ds = self._dataset.copy(deep=False)
-        yield ds
-        self._dataset = ds
-
     @name.setter
     def name(self, value):
-        with self._set_new_dataset() as ds:
-            ds.rename({self.name: value}, inplace=True)
         self._name = value
 
     @property
     def variable(self):
-        return self._dataset._variables[self.name]
+        return self._variable
 
     @property
     def dtype(self):
@@ -366,7 +386,14 @@ def _item_key_to_dict(self, key):
 
     def __getitem__(self, key):
         if isinstance(key, basestring):
-            return self.coords[key]
+            from .dataset import _get_virtual_variable
+
+            try:
+                var = self._coords[key]
+            except KeyError:
+                _, key, var = _get_virtual_variable(self._coords, key)
+
+            return self._replace_maybe_drop_dims(var, name=key)
         else:
             # orthogonal array indexing
             return self.isel(**self._item_key_to_dict(key))
@@ -379,7 +406,7 @@ def __setitem__(self, key, value):
             self.variable[key] = value
 
     def __delitem__(self, key):
-        del self._dataset[key]
+        del self.coords[key]
 
     @property
     def _attr_sources(self):
@@ -387,7 +414,7 @@ def _attr_sources(self):
         return [self.coords, self.attrs]
 
     def __contains__(self, key):
-        return key in self._dataset
+        return key in self._coords
 
     @property
     def loc(self):
@@ -449,10 +476,19 @@ def reset_coords(self, names=None, drop=False, inplace=False):
             raise ValueError('cannot reset coordinates in-place on a '
                              'DataArray without ``drop == True``')
         if names is None:
-            names = (self._dataset._coord_names - set(self.dims) -
-                     set([self.name]))
-        ds = self._dataset.reset_coords(names, drop, inplace)
-        return ds[self.name] if drop else ds
+            names = set(self.coords) - set(self.dims)
+        dataset = self.coords.to_dataset().reset_coords(names, drop)
+        if drop:
+            if inplace:
+                self._coords = dataset._variables
+            else:
+                return self._replace(coords=dataset._variables)
+        else:
+            if self.name is None:
+                raise ValueError('cannot reset_coords with drop=False '
+                                 'on an unnamed DataArrray')
+            dataset[self.name] = self.variable
+            return dataset
 
     def load(self):
         """Manually trigger loading of this array's data from disk or a
@@ -463,7 +499,10 @@ def load(self):
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
         """
-        self._dataset.load()
+        ds = self._to_temp_dataset().load()
+        new = self._from_temp_dataset(ds)
+        self._variable = new._variable
+        self._coords = new._coords
         return self
 
     def load_data(self):  # pragma: no cover
@@ -479,8 +518,10 @@ def copy(self, deep=True):
         dataset. Otherwise, a shallow copy is made, so each variable in the new
         array's dataset is also a variable in this array's dataset.
         """
-        ds = self._dataset.copy(deep=deep)
-        return self._with_replaced_dataset(ds)
+        variable = self.variable.copy(deep=deep)
+        coords = OrderedDict((k, v.copy(deep=deep))
+                             for k, v in self._coords.items())
+        return self._replace(variable, coords)
 
     def __copy__(self):
         return self.copy(deep=False)
@@ -524,8 +565,8 @@ def chunk(self, chunks=None):
         if isinstance(chunks, (list, tuple)):
             chunks = dict(zip(self.dims, chunks))
 
-        ds = self._dataset.chunk(chunks)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().chunk(chunks)
+        return self._from_temp_dataset(ds)
 
     def isel(self, **indexers):
         """Return a new DataArray whose dataset is given by integer indexing
@@ -536,8 +577,8 @@ def isel(self, **indexers):
         Dataset.isel
         DataArray.sel
         """
-        ds = self._dataset.isel(**indexers)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().isel(**indexers)
+        return self._from_temp_dataset(ds)
 
     def sel(self, method=None, tolerance=None, **indexers):
         """Return a new DataArray whose dataset is given by selecting
@@ -559,8 +600,8 @@ def isel_points(self, dim='points', **indexers):
         --------
         Dataset.isel_points
         """
-        ds = self._dataset.isel_points(dim=dim, **indexers)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().isel_points(dim=dim, **indexers)
+        return self._from_temp_dataset(ds)
 
     def sel_points(self, dim='points', method=None, tolerance=None,
                    **indexers):
@@ -571,9 +612,9 @@ def sel_points(self, dim='points', method=None, tolerance=None,
         --------
         Dataset.sel_points
         """
-        ds = self._dataset.sel_points(dim=dim, method=method,
-                                      tolerance=tolerance, **indexers)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().sel_points(
+            dim=dim, method=method, tolerance=tolerance, **indexers)
+        return self._from_temp_dataset(ds)
 
     def reindex_like(self, other, method=None, tolerance=None, copy=True):
         """Conform this object onto the indexes of another object, filling
@@ -660,9 +701,9 @@ def reindex(self, method=None, tolerance=None, copy=True, **indexers):
         DataArray.reindex_like
         align
         """
-        ds = self._dataset.reindex(method=method, tolerance=tolerance,
-                                   copy=copy, **indexers)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().reindex(
+            method=method, tolerance=tolerance, copy=copy, **indexers)
+        return self._from_temp_dataset(ds)
 
     def rename(self, new_name_or_name_dict):
         """Returns a new DataArray with renamed coordinates and/or a new name.
@@ -686,13 +727,12 @@ def rename(self, new_name_or_name_dict):
         DataArray.swap_dims
         """
         if utils.is_dict_like(new_name_or_name_dict):
-            name_dict = new_name_or_name_dict
-            new_name = name_dict.get(self.name, self.name)
+            name_dict = new_name_or_name_dict.copy()
+            name = name_dict.pop(self.name, self.name)
+            dataset = self._to_temp_dataset().rename(name_dict)
+            return self._from_temp_dataset(dataset, name)
         else:
-            new_name = new_name_or_name_dict
-            name_dict = {self.name: new_name}
-        renamed_dataset = self._dataset.rename(name_dict)
-        return renamed_dataset[new_name]
+            return self._replace(name=new_name_or_name_dict)
 
     def swap_dims(self, dims_dict):
         """Returns a new DataArray with swapped dimensions.
@@ -717,8 +757,8 @@ def swap_dims(self, dims_dict):
         DataArray.rename
         Dataset.swap_dims
         """
-        ds = self._dataset.swap_dims(dims_dict)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().swap_dims(dims_dict)
+        return self._from_temp_dataset(ds)
 
     def transpose(self, *dims):
         """Return a new DataArray object with transposed dimensions.
@@ -744,9 +784,8 @@ def transpose(self, *dims):
         numpy.transpose
         Dataset.transpose
         """
-        ds = self._dataset.copy()
-        ds[self.name] = self.variable.transpose(*dims)
-        return self._with_replaced_dataset(ds)
+        variable = self.variable.transpose(*dims)
+        return self._replace(variable)
 
     def squeeze(self, dim=None):
         """Return a new DataArray object with squeezed data.
@@ -773,8 +812,7 @@ def squeeze(self, dim=None):
         --------
         numpy.squeeze
         """
-        ds = self._dataset.squeeze(dim)
-        return self._with_replaced_dataset(ds)
+        return squeeze(self, dict(zip(self.dims, self.shape)), dim)
 
     def drop(self, labels, dim=None):
         """Drop coordinates or index labels from this DataArray.
@@ -793,10 +831,8 @@ def drop(self, labels, dim=None):
         """
         if utils.is_scalar(labels):
             labels = [labels]
-        if dim is None and self.name in labels:
-            raise ValueError('cannot drop this DataArray from itself')
-        ds = self._dataset.drop(labels, dim)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().drop(labels, dim)
+        return self._from_temp_dataset(ds)
 
     def dropna(self, dim, how='any', thresh=None):
         """Returns a new array with dropped labels for missing values along
@@ -817,8 +853,8 @@ def dropna(self, dim, how='any', thresh=None):
         -------
         DataArray
         """
-        ds = self._dataset.dropna(dim, how=how, thresh=thresh)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().dropna(dim, how=how, thresh=thresh)
+        return self._from_temp_dataset(ds)
 
     def fillna(self, value):
         """Fill missing values in this object.
@@ -874,9 +910,7 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
             summarized data and the indicated dimension(s) removed.
         """
         var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
-        ds = self._dataset.drop(set(self.dims) - set(var.dims))
-        ds[self.name] = var
-        return self._with_replaced_dataset(ds)
+        return self._replace_maybe_drop_dims(var)
 
     def to_pandas(self):
         """Convert this array into a pandas object with the same shape.
@@ -905,7 +939,7 @@ def to_pandas(self):
                              'pandas objects' % self.ndim)
         return constructor(self.values, *self.indexes.values())
 
-    def to_dataframe(self):
+    def to_dataframe(self, name=None):
         """Convert this array and its coordinates into a tidy pandas.DataFrame.
 
         The DataFrame is indexed by the Cartesian product of index coordinates
@@ -913,9 +947,24 @@ def to_dataframe(self):
 
         Other coordinates are included as columns in the DataFrame.
         """
-        # TODO: add a 'name' parameter
+        if name is None:
+            name = self.name
+        if name is None:
+            raise ValueError('cannot convert an unnamed DataArray to a '
+                             'DataFrame: use the ``name`` parameter')
+
         dims = OrderedDict(zip(self.dims, self.shape))
-        return self._dataset._to_dataframe(dims)
+        # By using a unique name, we can convert a DataArray into a DataFrame
+        # even if it shares a name with one of its coordinates.
+        # I would normally use unique_name = object() but that results in a
+        # dataframe with columns in the wrong order, for reasons I have not
+        # been able to debug (possibly a pandas bug?).
+        unique_name = '__unique_name_identifier_z98xfz98xugfg73ho__'
+        ds = self._to_dataset_whole(name=unique_name)
+        df = ds._to_dataframe(dims)
+        df.columns = [name if c == unique_name else c
+                      for c in df.columns]
+        return df
 
     def to_series(self):
         """Convert this array into a pandas.Series.
@@ -953,9 +1002,10 @@ def from_series(cls, series):
         method.
         """
         # TODO: add a 'name' parameter
-        df = pd.DataFrame({series.name: series})
+        name = series.name
+        df = pd.DataFrame({name: series})
         ds = Dataset.from_dataframe(df)
-        return cls._new_from_dataset_no_copy(ds, series.name)
+        return ds[name]
 
     def to_cdms2(self):
         """Convert this array into a cdms2.Variable
@@ -1030,36 +1080,17 @@ def identical(self, other):
     __default_name = object()
 
     def _result_name(self, other=None):
-
-        if self.name in self.dims:
-            # these names match dimension, so if we preserve them we will also
-            # rename indexes
-            return None
-
-        if other is None:
-            # shortcut
-            return self.name
-
-        other_name = getattr(other, 'name', self.__default_name)
-        other_dims = getattr(other, 'dims', ())
-
-        if other_name in other_dims:
-            # same trouble as above
-            return None
-
         # use the same naming heuristics as pandas:
         # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356
+        other_name = getattr(other, 'name', self.__default_name)
         if other_name is self.__default_name or other_name == self.name:
             return self.name
-
-        return None
+        else:
+            return None
 
     def __array_wrap__(self, obj, context=None):
         new_var = self.variable.__array_wrap__(obj, context)
-        ds = self.coords.to_dataset()
-        name = self._result_name()
-        ds[name] = new_var
-        return self._new_from_dataset_no_copy(ds, name)
+        return self._replace(new_var)
 
     @staticmethod
     def _unary_op(f):
@@ -1081,15 +1112,16 @@ def func(self, other):
                 if empty_indexes:
                     raise ValueError('no overlapping labels for some '
                                      'dimensions: %s' % empty_indexes)
-            other_coords = getattr(other, 'coords', None)
             other_variable = getattr(other, 'variable', other)
-            ds = self.coords.merge(other_coords)
-            name = self._result_name(other)
-            ds[name] = (f(self.variable, other_variable)
+            other_coords = getattr(other, 'coords', None)
+
+            variable = (f(self.variable, other_variable)
                         if not reflexive
                         else f(other_variable, self.variable))
-            result = self._new_from_dataset_no_copy(ds, name)
-            return result
+            coords = self.coords.merge(other_coords)._variables
+            name = self._result_name(other)
+
+            return self._replace(variable, coords, name)
         return func
 
     @staticmethod
@@ -1108,7 +1140,7 @@ def func(self, other):
 
     @property
     def plot(self):
-        '''
+        """
         Access plotting functions
 
         >>> d = DataArray([[1, 2], [3, 4]])
@@ -1120,11 +1152,11 @@ def plot(self):
         DataArray methods
         >>> d.plot.imshow()  # equivalent to xray.plot.imshow(d)
 
-        '''
+        """
         return _PlotMethods(self)
 
     def _title_for_slice(self, truncate=50):
-        '''
+        """
         If the dataarray has 1 dimensional coordinates or comes from a slice
         we can show that info in the title
 
@@ -1138,7 +1170,7 @@ def _title_for_slice(self, truncate=50):
         title : string
             Can be used for plot titles
 
-        '''
+        """
         one_dims = []
         for dim, coord in iteritems(self.coords):
             if coord.size == 1:
@@ -1186,8 +1218,8 @@ def diff(self, dim, n=1, label='upper'):
         * x        (x) int64 3 4
 
         """
-        ds = self._dataset.diff(n=n, dim=dim, label=label)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label)
+        return self._from_temp_dataset(ds)
 
     def shift(self, **shifts):
         """Shift this array by an offset along one or more dimensions.
@@ -1223,9 +1255,8 @@ def shift(self, **shifts):
         Coordinates:
           * x        (x) int64 0 1 2
         """
-        ds = self._dataset.copy()
-        ds[self.name] = self.variable.shift(**shifts)
-        return self._with_replaced_dataset(ds)
+        variable = self.variable.shift(**shifts)
+        return self._replace(variable)
 
     def roll(self, **shifts):
         """Roll this array by an offset along one or more dimensions.
@@ -1258,16 +1289,16 @@ def roll(self, **shifts):
         Coordinates:
           * x        (x) int64 2 0 1
         """
-        ds = self._dataset.roll(**shifts)
-        return self._with_replaced_dataset(ds)
+        ds = self._to_temp_dataset().roll(**shifts)
+        return self._from_temp_dataset(ds)
 
     @property
     def real(self):
-        return self._with_replaced_dataset(self._dataset.real)
+        return self._replace(self.variable.real)
 
     @property
     def imag(self):
-        return self._with_replaced_dataset(self._dataset.imag)
+        return self._replace(self.variable.imag)
 
 # priority most be higher than Variable to properly work with binary ufuncs
 ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
diff --git a/xray/core/dataset.py b/xray/core/dataset.py
index 9e873358ca8..964b298c1be 100644
--- a/xray/core/dataset.py
+++ b/xray/core/dataset.py
@@ -14,11 +14,13 @@
 from . import alignment
 from . import formatting
 from .. import conventions
-from .alignment import align, partial_align
+from .alignment import align, align_variables
 from .coordinates import DatasetCoordinates, Indexes
 from .common import ImplementsDatasetReduce, BaseDataObject
+from .merge import merge_datasets, expand_variables
 from .utils import Frozen, SortedKeysDict, ChainMap, maybe_wrap_array, hashable
-from .variable import as_variable, Variable, Coordinate, broadcast_variables
+from .variable import (as_variable, Variable, Coordinate, broadcast_variables,
+                       default_index_coordinate)
 from .pycompat import (iteritems, basestring, OrderedDict,
                        dask_array_type)
 from .combine import concat
@@ -61,92 +63,6 @@ def _get_virtual_variable(variables, key):
     return ref_name, var_name, Variable(ref_var.dims, data)
 
 
-def _as_dataset_variable(name, var):
-    """Prepare a variable for adding it to a Dataset
-    """
-    try:
-        var = as_variable(var, key=name)
-    except TypeError:
-        raise TypeError('Dataset variables must be an array or a tuple of '
-                        'the form (dims, data[, attrs, encoding])')
-    if name in var.dims:
-        # convert the into an Index
-        if var.ndim != 1:
-            raise ValueError('the variable %r has the same name as one of its '
-                             'dimensions %r, but it is not 1-dimensional and '
-                             'thus it is not a valid index' % (name, var.dims))
-        var = var.to_coord()
-    return var
-
-
-def _align_variables(variables, join='outer'):
-    """Align all DataArrays in the provided dict, leaving other values alone.
-    """
-    alignable = [k for k, v in variables.items() if hasattr(v, 'indexes')]
-    aligned = align(*[variables[a] for a in alignable],
-                    join=join, copy=False)
-    new_variables = OrderedDict(variables)
-    new_variables.update(zip(alignable, aligned))
-    return new_variables
-
-
-def _expand_variables(raw_variables, old_variables=None, compat='identical'):
-    """Expand a dictionary of variables.
-
-    Returns a dictionary of Variable objects suitable for inserting into a
-    Dataset._variables dictionary.
-
-    This includes converting tuples (dims, data) into Variable objects,
-    converting coordinate variables into Coordinate objects and expanding
-    DataArray objects into Variables plus coordinates.
-
-    Raises ValueError if any conflicting values are found, between any of the
-    new or old variables.
-    """
-    if old_variables is None:
-        old_variables = {}
-    new_variables = OrderedDict()
-    new_coord_names = set()
-    variables = ChainMap(new_variables, old_variables)
-
-    def maybe_promote_or_replace(name, var):
-        existing_var = variables[name]
-        if name not in existing_var.dims:
-            if name in var.dims:
-                variables[name] = var
-            else:
-                common_dims = OrderedDict(zip(existing_var.dims,
-                                              existing_var.shape))
-                common_dims.update(zip(var.dims, var.shape))
-                variables[name] = existing_var.expand_dims(common_dims)
-                new_coord_names.update(var.dims)
-
-    def add_variable(name, var):
-        var = _as_dataset_variable(name, var)
-        if name not in variables:
-            variables[name] = var
-            new_coord_names.update(variables[name].dims)
-        else:
-            if not getattr(variables[name], compat)(var):
-                raise ValueError('conflicting value for variable %s:\n'
-                                 'first value: %r\nsecond value: %r'
-                                 % (name, variables[name], var))
-            if compat == 'broadcast_equals':
-                maybe_promote_or_replace(name, var)
-
-    for name, var in iteritems(raw_variables):
-        if hasattr(var, 'coords'):
-            # it's a DataArray
-            new_coord_names.update(var.coords)
-            for dim, coord in iteritems(var.coords):
-                if dim != name:
-                    add_variable(dim, coord.variable)
-            var = var.variable
-        add_variable(name, var)
-
-    return new_variables, new_coord_names
-
-
 def _calculate_dims(variables):
     """Calculate the dimensions corresponding to a set of variables.
 
@@ -171,40 +87,6 @@ def _calculate_dims(variables):
     return dims
 
 
-def _merge_expand(aligned_self, other, overwrite_vars, compat):
-    possible_conflicts = dict((k, v) for k, v in aligned_self._variables.items()
-                              if k not in overwrite_vars)
-    new_vars, new_coord_names = _expand_variables(other, possible_conflicts, compat)
-    replace_vars = aligned_self._variables.copy()
-    replace_vars.update(new_vars)
-    return replace_vars, new_vars, new_coord_names
-
-
-def _merge_dataset(self, other, overwrite_vars, compat, join):
-    aligned_self, other = partial_align(self, other, join=join, copy=False)
-
-    replace_vars, new_vars, new_coord_names = _merge_expand(
-        aligned_self, other._variables, overwrite_vars, compat)
-    new_coord_names.update(other._coord_names)
-
-    return replace_vars, new_vars, new_coord_names
-
-
-def _merge_dict(self, other, overwrite_vars, compat, join):
-    other = _align_variables(other, join='outer')
-
-    alignable = [k for k, v in other.items() if hasattr(v, 'indexes')]
-    aligned = partial_align(self, *[other[a] for a in alignable],
-                            join=join, copy=False, exclude=overwrite_vars)
-
-    aligned_self = aligned[0]
-
-    other = OrderedDict(other)
-    other.update(zip(alignable, aligned[1:]))
-
-    return _merge_expand(aligned_self, other, overwrite_vars, compat)
-
-
 def _assert_empty(args, msg='%s'):
     if args:
         raise ValueError(msg % args)
@@ -213,16 +95,17 @@ def _assert_empty(args, msg='%s'):
 def as_dataset(obj):
     """Cast the given object to a Dataset.
 
-    Handles DataArrays, Datasets and dictionaries of variables. A new Dataset
-    object is only created in the last case.
+    Handles Datasets, DataArrays and dictionaries of variables. A new Dataset
+    object is only created if the provided object is not already one.
     """
-    obj = getattr(obj, '_dataset', obj)
+    if hasattr(obj, 'to_dataset'):
+        obj = obj.to_dataset()
     if not isinstance(obj, Dataset):
         obj = Dataset(obj)
     return obj
 
 
-class Variables(Mapping):
+class DataVariables(Mapping):
     def __init__(self, dataset):
         self._dataset = dataset
 
@@ -332,11 +215,7 @@ def _add_missing_coords_inplace(self):
         """
         for dim, size in iteritems(self.dims):
             if dim not in self._variables:
-                # This is equivalent to np.arange(size), but
-                # waits to create the array until its actually accessed.
-                data = indexing.LazyIntegerRange(size)
-                coord = Coordinate(dim, data)
-                self._variables[dim] = coord
+                self._variables[dim] = default_index_coordinate(dim, size)
 
     def _update_vars_and_coords(self, new_variables, new_coord_names=None,
                                 needs_copy=True, check_coord_names=True):
@@ -375,9 +254,9 @@ def _set_init_vars_and_dims(self, vars, coords, compat):
                       'redundant variables and coordinates: %s')
         variables = ChainMap(vars, coords)
 
-        aligned = _align_variables(variables)
-        new_variables, new_coord_names = _expand_variables(aligned,
-                                                           compat=compat)
+        aligned = align_variables(variables)
+        new_variables, new_coord_names = expand_variables(aligned,
+                                                          compat=compat)
 
         new_coord_names.update(coords)
         self._update_vars_and_coords(new_variables, new_coord_names,
@@ -549,7 +428,19 @@ def copy(self, deep=False):
         return self._construct_direct(variables, self._coord_names.copy(),
                                       self._dims.copy(), self._attrs_copy())
 
-    def _copy_listed(self, names, keep_attrs=True):
+    def _subset_with_all_valid_coords(self, variables, coord_names, attrs):
+        needed_dims = set()
+        for v in variables.values():
+            needed_dims.update(v.dims)
+        for k in self._coord_names:
+            if set(self.variables[k].dims) <= needed_dims:
+                variables[k] = self._variables[k]
+                coord_names.add(k)
+        dims = dict((k, self._dims[k]) for k in needed_dims)
+
+        return self._construct_direct(variables, coord_names, dims, attrs)
+
+    def _copy_listed(self, names):
         """Create a new Dataset with the listed variables from this dataset and
         the all relevant coordinates. Skips all validation.
         """
@@ -566,19 +457,26 @@ def _copy_listed(self, names, keep_attrs=True):
                 if ref_name in self._coord_names:
                     coord_names.add(var_name)
 
-        needed_dims = set()
-        for v in variables.values():
-            needed_dims.update(v._dims)
-        for k in self._coord_names:
-            if set(self._variables[k]._dims) <= needed_dims:
-                variables[k] = self._variables[k]
-                coord_names.add(k)
+        return self._subset_with_all_valid_coords(variables, coord_names,
+                                                  attrs=self.attrs.copy())
 
-        dims = dict((k, self._dims[k]) for k in needed_dims)
+    def _construct_dataarray(self, name):
+        """Construct a DataArray by indexing this dataset
+        """
+        from .dataarray import DataArray
 
-        attrs = self.attrs.copy() if keep_attrs else None
+        try:
+            variable = self._variables[name]
+        except KeyError:
+            _, name, variable = _get_virtual_variable(self._variables, name)
 
-        return self._construct_direct(variables, coord_names, dims, attrs)
+        coords = OrderedDict()
+        needed_dims = set(variable.dims)
+        for k in self.coords:
+            if set(self.variables[k].dims) <= needed_dims:
+                coords[k] = self.variables[k]
+
+        return DataArray(variable, coords, name=name, fastpath=True)
 
     def __copy__(self):
         return self.copy(deep=False)
@@ -617,13 +515,11 @@ def __getitem__(self, key):
 
         Indexing with a list of names will return a new ``Dataset`` object.
         """
-        from .dataarray import DataArray
-
         if utils.is_dict_like(key):
             return self.isel(**key)
 
         if hashable(key):
-            return DataArray._new_from_dataset(self, key)
+            return self._construct_dataarray(key)
         else:
             return self._copy_listed(np.asarray(key))
 
@@ -745,7 +641,7 @@ def coords(self):
     def data_vars(self):
         """Dictionary of xray.DataArray objects corresponding to data variables
         """
-        return Variables(self)
+        return DataVariables(self)
 
     @property
     def vars(self):  # pragma: no cover
@@ -1334,10 +1230,13 @@ def rename(self, name_dict, inplace=False):
         Dataset.swap_dims
         DataArray.rename
         """
-        for k in name_dict:
+        for k, v in name_dict.items():
             if k not in self:
                 raise ValueError("cannot rename %r because it is not a "
                                  "variable in this dataset" % k)
+            if v in self:
+                raise ValueError('the new name %r already exists' % v)
+
         variables = OrderedDict()
         coord_names = set()
         for k, v in iteritems(self._variables):
@@ -1472,27 +1371,8 @@ def merge(self, other, inplace=False, overwrite_vars=set(),
         ValueError
             If any variables conflict (see ``compat``).
         """
-        if compat not in ['broadcast_equals', 'equals', 'identical']:
-            raise ValueError("compat=%r invalid: must be 'broadcast_equals', "
-                             "'equals' or 'identical'" % compat)
-
-        if isinstance(overwrite_vars, basestring):
-            overwrite_vars = [overwrite_vars]
-        overwrite_vars = set(overwrite_vars)
-
-        merge = _merge_dataset if isinstance(other, Dataset) else _merge_dict
-
-        replace_vars, new_vars, new_coord_names = merge(
+        replace_vars, new_coord_names = merge_datasets(
             self, other, overwrite_vars, compat=compat, join=join)
-
-        newly_coords = new_coord_names & (set(self) - set(self.coords))
-        no_longer_coords = set(self.coords) & (set(new_vars) - new_coord_names)
-        ambiguous_coords = (newly_coords | no_longer_coords) - overwrite_vars
-        if ambiguous_coords:
-            raise ValueError('cannot merge: the following variables are '
-                             'coordinates on one dataset but not the other: %s'
-                             % list(ambiguous_coords))
-
         obj = self if inplace else self.copy()
         obj._update_vars_and_coords(replace_vars, new_coord_names)
         return obj
diff --git a/xray/core/groupby.py b/xray/core/groupby.py
index e2a29b0b823..9c53a2791b9 100644
--- a/xray/core/groupby.py
+++ b/xray/core/groupby.py
@@ -9,7 +9,7 @@
 )
 from .pycompat import zip
 from .utils import peek_at, maybe_wrap_array, safe_cast_to_index
-from .variable import Variable, Coordinate
+from .variable import as_variable, Variable, Coordinate
 
 
 def unique_value_groups(ar):
@@ -110,7 +110,10 @@ def __init__(self, obj, group, squeeze=False, grouper=None):
             raise ValueError("`group` must have a 'dims' attribute")
         group_dim, = group.dims
 
-        expected_size = as_dataset(obj).dims[group_dim]
+        try:
+            expected_size = obj.dims[group_dim]
+        except TypeError:
+            expected_size = obj.shape[obj.get_axis_num(group_dim)]
         if group.size != expected_size:
             raise ValueError('the group variable\'s length does not '
                              'match the length of this variable along its '
@@ -312,19 +315,16 @@ def _iter_grouped_shortcut(self):
             yield var[{self.group_dim: indices}]
 
     def _concat_shortcut(self, applied, concat_dim, positions):
+        # nb. don't worry too much about maintaining this method -- it does
+        # speed things up, but it's not very interpretable and there are much
+        # faster alternatives (e.g., doing the grouped aggregation in a
+        # compiled language)
         stacked = Variable.concat(
             applied, concat_dim, positions, shortcut=True)
         stacked.attrs.update(self.obj.attrs)
-
-        name = self.obj.name
-        ds = self.obj._dataset.drop(name)
-        ds[concat_dim.name] = concat_dim
-        # remove extraneous dimensions
-        for dim in ds.dims:
-            if dim not in stacked.dims:
-                del ds[dim]
-        ds[name] = stacked
-        return ds[name]
+        result = self.obj._replace_maybe_drop_dims(stacked)
+        result._coords[concat_dim.name] = as_variable(concat_dim, copy=True)
+        return result
 
     def _restore_dim_order(self, stacked):
         def lookup_order(dimension):
diff --git a/xray/core/merge.py b/xray/core/merge.py
new file mode 100644
index 00000000000..c1830127fbc
--- /dev/null
+++ b/xray/core/merge.py
@@ -0,0 +1,170 @@
+from .alignment import align, partial_align, align_variables
+from .utils import ChainMap
+from .variable import as_variable
+from .pycompat import (basestring, iteritems, OrderedDict)
+
+
+def _as_dataset_variable(name, var):
+    """Prepare a variable for adding it to a Dataset
+    """
+    try:
+        var = as_variable(var, key=name)
+    except TypeError:
+        raise TypeError('variables must be given by arrays or a tuple of '
+                        'the form (dims, data[, attrs, encoding])')
+    if name in var.dims:
+        # convert the into an Index
+        if var.ndim != 1:
+            raise ValueError('the variable %r has the same name as one of its '
+                             'dimensions %r, but it is not 1-dimensional and '
+                             'thus it is not a valid index' % (name, var.dims))
+        var = var.to_coord()
+    return var
+
+
+def expand_variables(raw_variables, old_variables=None, compat='identical'):
+    """Expand a dictionary of variables.
+
+    Returns a dictionary of Variable objects suitable for inserting into a
+    Dataset._variables dictionary.
+
+    This includes converting tuples (dims, data) into Variable objects,
+    converting coordinate variables into Coordinate objects and expanding
+    DataArray objects into Variables plus coordinates.
+
+    Raises ValueError if any conflicting values are found, between any of the
+    new or old variables.
+    """
+    if old_variables is None:
+        old_variables = {}
+    new_variables = OrderedDict()
+    new_coord_names = set()
+    variables = ChainMap(new_variables, old_variables)
+
+    def maybe_promote_or_replace(name, var):
+        existing_var = variables[name]
+        if name not in existing_var.dims:
+            if name in var.dims:
+                variables[name] = var
+            else:
+                common_dims = OrderedDict(zip(existing_var.dims,
+                                              existing_var.shape))
+                common_dims.update(zip(var.dims, var.shape))
+                variables[name] = existing_var.expand_dims(common_dims)
+                new_coord_names.update(var.dims)
+
+    def add_variable(name, var):
+        var = _as_dataset_variable(name, var)
+        if name not in variables:
+            variables[name] = var
+            new_coord_names.update(variables[name].dims)
+        else:
+            if not getattr(variables[name], compat)(var):
+                raise ValueError('conflicting value for variable %s:\n'
+                                 'first value: %r\nsecond value: %r'
+                                 % (name, variables[name], var))
+            if compat == 'broadcast_equals':
+                maybe_promote_or_replace(name, var)
+
+    for name, var in iteritems(raw_variables):
+        if hasattr(var, 'coords'):
+            # it's a DataArray
+            new_coord_names.update(var.coords)
+            for dim, coord in iteritems(var.coords):
+                if dim != name:
+                    add_variable(dim, coord.variable)
+            var = var.variable
+        add_variable(name, var)
+
+    return new_variables, new_coord_names
+
+
+def _merge_expand(variables, other, overwrite_vars, compat):
+    possible_conflicts = dict((k, v) for k, v in variables.items()
+                              if k not in overwrite_vars)
+    new_vars, new_coord_names = expand_variables(other, possible_conflicts, compat)
+    replace_vars = variables.copy()
+    replace_vars.update(new_vars)
+    return replace_vars, new_vars, new_coord_names
+
+
+def _merge_dataset_with_dataset(self, other, overwrite_vars, compat, join):
+    aligned_self, other = align(self, other, join=join, copy=False)
+
+    replace_vars, new_vars, new_coord_names = _merge_expand(
+        aligned_self._variables, other._variables, overwrite_vars, compat)
+    new_coord_names.update(other._coord_names)
+
+    return replace_vars, new_vars, new_coord_names
+
+
+def _merge_dataset_with_dict(self, other, overwrite_vars, compat, join):
+    other = align_variables(other, join='outer', copy=False)
+
+    alignable = [k for k, v in other.items() if hasattr(v, 'indexes')]
+    aligned = partial_align(self, *[other[a] for a in alignable],
+                            join=join, copy=False, exclude=overwrite_vars)
+
+    aligned_self = aligned[0]
+
+    other = OrderedDict(other)
+    other.update(zip(alignable, aligned[1:]))
+
+    return _merge_expand(aligned_self._variables, other, overwrite_vars, compat)
+
+
+def merge_datasets(dataset, other, overwrite_vars=set(),
+                   compat='broadcast_equals', join='outer'):
+    """
+    Guts of Dataset.merge
+    """
+    from .dataset import Dataset
+
+    if compat not in ['broadcast_equals', 'equals', 'identical']:
+        raise ValueError("compat=%r invalid: must be 'broadcast_equals', "
+                         "'equals' or 'identical'" % compat)
+
+    if isinstance(overwrite_vars, basestring):
+        overwrite_vars = [overwrite_vars]
+    overwrite_vars = set(overwrite_vars)
+
+    if isinstance(other, Dataset):
+        merge_func = _merge_dataset_with_dataset
+    else:
+        merge_func = _merge_dataset_with_dict
+
+    replace_vars, new_vars, new_coord_names = merge_func(
+        dataset, other, overwrite_vars, compat=compat, join=join)
+
+    newly_coords = new_coord_names & set(dataset.data_vars)
+    no_longer_coords = set(dataset.coords) & (set(new_vars) - new_coord_names)
+    ambiguous_coords = (newly_coords | no_longer_coords) - overwrite_vars
+    if ambiguous_coords:
+        raise ValueError('cannot merge: the following variables are '
+                         'coordinates on one dataset but not the other: %s'
+                         % list(ambiguous_coords))
+
+    return replace_vars, new_coord_names
+
+
+def _reindex_variables_against(variables, indexes, copy=False):
+    """Reindex all DataArrays in the provided dict, leaving other values alone.
+    """
+    alignable = [k for k, v in variables.items() if hasattr(v, 'indexes')]
+    aligned = [variables[a].reindex(copy=copy, indexes=indexes)
+               for a in alignable]
+    new_variables = OrderedDict(variables)
+    new_variables.update(zip(alignable, aligned))
+    return new_variables
+
+
+def merge_dataarray_coords(indexes, variables, other):
+    """
+    Return the new dictionary of coordinate variables given by merging in
+    ``other`` to to these variables.
+    """
+    other = align_variables(other, join='outer', copy=False)
+    other = _reindex_variables_against(other, indexes, copy=False)
+    replace_vars, _, __ = _merge_expand(
+        variables, other, other, compat='broadcast_equals')
+    return replace_vars
diff --git a/xray/core/variable.py b/xray/core/variable.py
index 2b742e3eb60..ba54ac591f5 100644
--- a/xray/core/variable.py
+++ b/xray/core/variable.py
@@ -11,7 +11,8 @@
 from . import ops
 from . import utils
 from .pycompat import basestring, OrderedDict, zip, dask_array_type
-from .indexing import (PandasIndexAdapter, orthogonally_indexable)
+from .indexing import (PandasIndexAdapter, orthogonally_indexable,
+                       LazyIntegerRange)
 
 import xray  # only for Dataset and DataArray
 
@@ -21,7 +22,7 @@
     pass
 
 
-def as_variable(obj, key=None, strict=True):
+def as_variable(obj, key=None, strict=True, copy=False):
     """Convert an object into an Variable
 
     - If the object is already an `Variable`, return it.
@@ -56,9 +57,20 @@ def as_variable(obj, key=None, strict=True):
             obj = Variable(key, obj)
         else:
             raise TypeError('cannot infer Variable dimensions')
+    else:
+        if copy:
+            obj = obj.copy(deep=False)
     return obj
 
 
+def default_index_coordinate(dim, size):
+    """
+    This is equivalent to np.arange(size), but waits to create the array until
+    its actually accessed.
+    """
+    return Coordinate(dim, LazyIntegerRange(size))
+
+
 def _maybe_wrap_data(data):
     """
     Put pandas.Index and numpy.ndarray arguments in adapter objects to ensure
@@ -72,7 +84,7 @@ def _maybe_wrap_data(data):
     return data
 
 
-def _as_compatible_data(data, fastpath=False):
+def as_compatible_data(data, fastpath=False):
     """Prepare and wrap data to put in a Variable.
 
     - If data does not have the necessary attributes, convert it to ndarray.
@@ -197,7 +209,7 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
             Well behaviored code to serialize a Variable should ignore
             unrecognized encoding items.
         """
-        self._data = _as_compatible_data(data, fastpath=fastpath)
+        self._data = as_compatible_data(data, fastpath=fastpath)
         self._dims = self._parse_dimensions(dims)
         self._attrs = None
         self._encoding = None
@@ -231,7 +243,7 @@ def data(self):
 
     @data.setter
     def data(self, data):
-        data = _as_compatible_data(data)
+        data = as_compatible_data(data)
         if data.shape != self.shape:
             raise ValueError(
                 "replacement data must match the Variable's shape")
diff --git a/xray/test/__init__.py b/xray/test/__init__.py
index ad244b34fc2..40f71d09a62 100644
--- a/xray/test/__init__.py
+++ b/xray/test/__init__.py
@@ -195,7 +195,8 @@ def assertDataArrayEqual(self, ar1, ar2):
 
     def assertDataArrayIdentical(self, ar1, ar2):
         self.assertEqual(ar1.name, ar2.name)
-        self.assertDatasetIdentical(ar1.to_dataset(), ar2.to_dataset())
+        self.assertDatasetIdentical(ar1._to_temp_dataset(),
+                                    ar2._to_temp_dataset())
 
     def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08):
         self.assertVariableAllClose(ar1, ar2, rtol=rtol, atol=atol)
diff --git a/xray/test/test_backends.py b/xray/test/test_backends.py
index 364094c7d3f..033737409f3 100644
--- a/xray/test/test_backends.py
+++ b/xray/test/test_backends.py
@@ -121,10 +121,10 @@ def assert_loads(vars=None):
             if vars is None:
                 vars = expected
             with self.roundtrip(expected) as actual:
-                for v in actual.values():
+                for v in actual.variables.values():
                     self.assertFalse(v._in_memory)
                 yield actual
-                for k, v in actual.items():
+                for k, v in actual.variables.items():
                     if k in vars:
                         self.assertTrue(v._in_memory)
                 self.assertDatasetAllClose(expected, actual)
diff --git a/xray/test/test_combine.py b/xray/test/test_combine.py
index d7dc15c18c6..cbe14cc84ed 100644
--- a/xray/test/test_combine.py
+++ b/xray/test/test_combine.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from xray import Dataset, DataArray, auto_combine, concat
+from xray import Dataset, DataArray, auto_combine, concat, Variable
 from xray.core.pycompat import iteritems, OrderedDict
 
 from . import TestCase, InaccessibleArray, requires_dask
@@ -207,6 +207,13 @@ def test_concat_do_not_promote(self):
         with self.assertRaises(ValueError):
             concat(objs, 't', coords='minimal')
 
+    def test_concat_dim_is_variable(self):
+        objs = [Dataset({'x': 0}), Dataset({'x': 1})]
+        coord = Variable('y', [3, 4])
+        expected = Dataset({'x': ('y', [0, 1]), 'y': [3, 4]})
+        actual = concat(objs, coord)
+        self.assertDatasetIdentical(actual, expected)
+
     @requires_dask  # only for toolz
     def test_auto_combine(self):
         objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
diff --git a/xray/test/test_dask.py b/xray/test/test_dask.py
index 20cfef2e3e6..99ddab258bf 100644
--- a/xray/test/test_dask.py
+++ b/xray/test/test_dask.py
@@ -199,7 +199,7 @@ def test_rechunk(self):
 
     def test_new_chunk(self):
         chunked = self.eager_array.chunk()
-        self.assertTrue(chunked.data.name.startswith('xray-foo-'))
+        self.assertTrue(chunked.data.name.startswith('xray-<this-array>'))
 
     def test_lazy_dataset(self):
         lazy_ds = Dataset({'foo': (('x', 'y'), self.data)})
diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py
index e95df6028e9..6f85c350663 100644
--- a/xray/test/test_dataarray.py
+++ b/xray/test/test_dataarray.py
@@ -72,7 +72,7 @@ def test_name(self):
 
         actual = DataArray(Coordinate('x', [3]))
         actual.name = 'y'
-        expected = DataArray(Coordinate('y', [3]))
+        expected = DataArray([3], {'x': [3]}, name='y')
         self.assertDataArrayIdentical(actual, expected)
 
     def test_dims(self):
@@ -517,13 +517,20 @@ def test_coords(self):
         actual = repr(da.coords)
         self.assertEquals(expected, actual)
 
+        with self.assertRaisesRegexp(ValueError, 'cannot delete'):
+            del da['x']
+
+        with self.assertRaisesRegexp(ValueError, 'cannot delete'):
+            del da.coords['x']
+
     def test_coord_coords(self):
         orig = DataArray([10, 20],
                          {'x': [1, 2], 'x2': ('x', ['a', 'b']), 'z': 4},
                          dims='x')
 
         actual = orig.coords['x']
-        expected = DataArray([1, 2], {'z': 4, 'x2': ('x', ['a', 'b'])},
+        expected = DataArray([1, 2], {'z': 4, 'x2': ('x', ['a', 'b']),
+                                      'x': [1, 2]},
                              dims='x', name='x')
         self.assertDataArrayIdentical(expected, actual)
 
@@ -532,7 +539,8 @@ def test_coord_coords(self):
             expected.reset_coords('x2', drop=True), actual)
 
         actual.coords['x3'] = ('x', ['a', 'b'])
-        expected = DataArray([1, 2], {'z': 4, 'x3': ('x', ['a', 'b'])},
+        expected = DataArray([1, 2], {'z': 4, 'x3': ('x', ['a', 'b']),
+                                      'x': [1, 2]},
                              dims='x', name='x')
         self.assertDataArrayIdentical(expected, actual)
 
@@ -576,7 +584,7 @@ def test_reset_coords(self):
 
         with self.assertRaisesRegexp(ValueError, 'cannot reset coord'):
             data.reset_coords(inplace=True)
-        with self.assertRaises(KeyError):
+        with self.assertRaisesRegexp(ValueError, 'cannot be found'):
             data.reset_coords('foo', drop=True)
         with self.assertRaisesRegexp(ValueError, 'cannot be found'):
             data.reset_coords('not_found')
@@ -705,7 +713,6 @@ def test_inplace_math_basics(self):
         self.assertIs(b.variable, v)
         self.assertArrayEqual(b.values, x)
         self.assertIs(source_ndarray(b.values), x)
-        self.assertDatasetIdentical(b._dataset, self.ds)
 
     def test_inplace_math_automatic_alignment(self):
         a = DataArray(range(5), [('x', range(5))])
@@ -726,8 +733,8 @@ def test_math_name(self):
         self.assertIs((a + a.rename(None)).name, None)
         self.assertIs((a + a.rename('bar')).name, None)
         self.assertEqual((a + a).name, 'foo')
-        self.assertIs((+a['x']).name, None)
-        self.assertIs((a['x'] + 0).name, None)
+        self.assertIs((+a['x']).name, 'x')
+        self.assertIs((a['x'] + 0).name, 'x')
         self.assertIs((a + a['x']).name, None)
 
     def test_math_with_coords(self):
@@ -785,12 +792,14 @@ def test_math_with_coords(self):
     def test_index_math(self):
         orig = DataArray(range(3), dims='x', name='x')
         actual = orig + 1
-        expected = DataArray(1 + np.arange(3), coords=[('x', range(3))])
+        expected = DataArray(1 + np.arange(3), coords=[('x', range(3))],
+                             name='x')
         self.assertDataArrayIdentical(expected, actual)
 
         # regression tests for #254
         actual = orig[0] < orig
-        expected = DataArray([False, True, True], coords=[('x', range(3))])
+        expected = DataArray([False, True, True], coords=[('x', range(3))],
+                             name='x')
         self.assertDataArrayIdentical(expected, actual)
 
         actual = orig > orig[0]
@@ -855,11 +864,11 @@ def test_drop_coordinates(self):
         with self.assertRaises(ValueError):
             arr.drop('not found')
 
-        with self.assertRaisesRegexp(ValueError, 'cannot drop'):
+        with self.assertRaisesRegexp(ValueError, 'cannot be found'):
             arr.drop(None)
 
         renamed = arr.rename('foo')
-        with self.assertRaisesRegexp(ValueError, 'cannot drop'):
+        with self.assertRaisesRegexp(ValueError, 'cannot be found'):
             renamed.drop('foo')
 
     def test_drop_index_labels(self):
@@ -1100,7 +1109,7 @@ def test_groupby_math(self):
             actual = array.coords['x'] + grouped
             self.assertDataArrayIdentical(expected, actual)
 
-            ds = array.coords['x'].to_dataset()
+            ds = array.coords['x'].to_dataset('X')
             expected = array + ds
             actual = grouped + ds
             self.assertDatasetIdentical(expected, actual)
@@ -1212,6 +1221,15 @@ def test_resample_first(self):
         expected = DataArray([np.nan, 4, 8], [('time', times[::4])])
         self.assertDataArrayIdentical(expected, actual)
 
+        # regerssion test for http://stackoverflow.com/questions/33158558/
+        array = Dataset({'time': times})['time']
+        actual = array.resample('1D', dim='time', how='last')
+        expected_times = pd.to_datetime(['2000-01-01T18', '2000-01-02T18',
+                                         '2000-01-03T06'])
+        expected = DataArray(expected_times, [('time', times[::4])],
+                             name='time')
+        self.assertDataArrayIdentical(expected, actual)
+
     def test_resample_skipna(self):
         times = pd.date_range('2000-01-01', freq='6H', periods=10)
         array = DataArray(np.ones(10), [('time', times)])
@@ -1305,9 +1323,9 @@ def test_to_pandas(self):
     def test_to_dataframe(self):
         # regression test for #260
         arr = DataArray(np.random.randn(3, 4),
-                        [('B', [1, 2, 3]), ('A', list('cdef'))])
+                        [('B', [1, 2, 3]), ('A', list('cdef'))], name='foo')
         expected = arr.to_series()
-        actual = arr.to_dataframe()[None]
+        actual = arr.to_dataframe()['foo']
         self.assertArrayEqual(expected.values, actual.values)
         self.assertArrayEqual(expected.name, actual.name)
         self.assertArrayEqual(expected.index.values, actual.index.values)
@@ -1316,12 +1334,29 @@ def test_to_dataframe(self):
         arr.coords['C'] = ('B', [-1, -2, -3])
         expected = arr.to_series().to_frame()
         expected['C'] = [-1] * 4 + [-2] * 4 + [-3] * 4
-        expected.columns = [None, 'C']
+        expected = expected[['C', 'foo']]
         actual = arr.to_dataframe()
         self.assertArrayEqual(expected.values, actual.values)
         self.assertArrayEqual(expected.columns.values, actual.columns.values)
         self.assertArrayEqual(expected.index.values, actual.index.values)
 
+        arr.name = None  # unnamed
+        with self.assertRaisesRegexp(ValueError, 'unnamed'):
+            arr.to_dataframe()
+
+    def test_to_pandas_name_matches_coordinate(self):
+        # coordinate with same name as array
+        arr = DataArray([1, 2, 3], dims='x', name='x')
+        series = arr.to_series()
+        self.assertArrayEqual([1, 2, 3], series.values)
+        self.assertArrayEqual([0, 1, 2], series.index.values)
+        self.assertEqual('x', series.name)
+        self.assertEqual('x', series.index.name)
+
+        frame = arr.to_dataframe()
+        expected = series.to_frame()
+        self.assertTrue(expected.equals(frame))
+
     def test_to_and_from_series(self):
         expected = self.dv.to_dataframe()['foo']
         actual = self.dv.to_series()
@@ -1401,10 +1436,8 @@ def test_to_and_from_cdms2(self):
 
     def test_to_dataset_whole(self):
         unnamed = DataArray([1, 2], dims='x')
-        actual = unnamed.to_dataset()
-        expected = Dataset({None: ('x', [1, 2])})
-        self.assertDatasetIdentical(expected, actual)
-        self.assertIsNot(unnamed._dataset, actual)
+        with self.assertRaisesRegexp(ValueError, 'unable to convert unnamed'):
+            unnamed.to_dataset()
 
         actual = unnamed.to_dataset(name='foo')
         expected = Dataset({'foo': ('x', [1, 2])})
@@ -1431,8 +1464,8 @@ def test_to_dataset_split(self):
         with self.assertRaises(TypeError):
             array.to_dataset('x', name='foo')
 
-        roundtriped = actual.to_array(dim='x')
-        self.assertDataArrayIdentical(array, roundtriped)
+        roundtripped = actual.to_array(dim='x')
+        self.assertDataArrayIdentical(array, roundtripped)
 
         array = DataArray([1, 2, 3], dims='x')
         expected = Dataset(OrderedDict([('0', 1), ('1', 2), ('2', 3)]))
@@ -1443,7 +1476,8 @@ def test__title_for_slice(self):
         array = DataArray(np.ones((4, 3, 2)), dims=['a', 'b', 'c'])
         self.assertEqual('', array._title_for_slice())
         self.assertEqual('c = 0', array.isel(c=0)._title_for_slice())
-        self.assertEqual('b = 1, c = 0', array.isel(b=1, c=0)._title_for_slice())
+        title = array.isel(b=1, c=0)._title_for_slice()
+        self.assertTrue('b = 1, c = 0' == title or 'c = 0, b = 1' == title)
 
         a2 = DataArray(np.ones((4, 1)), dims=['a', 'b'])
         self.assertEqual('b = [0]', a2._title_for_slice())
@@ -1467,6 +1501,14 @@ def test_dataarray_diff_n1(self):
                              ['x', 'y'])
         self.assertDataArrayEqual(expected, actual)
 
+    def test_coordinate_diff(self):
+        # regression test for GH634
+        arr = DataArray(range(0, 20, 2), dims=['lon'], coords=[range(10)])
+        lon = arr.coords['lon']
+        expected = DataArray([1] * 9, dims=['lon'], coords=[range(1, 10)],
+                             name='lon')
+        actual = lon.diff('lon')
+
     def test_shift(self):
         arr = DataArray([1, 2, 3], dims='x')
         actual = arr.shift(x=1)
diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
index 68989aed1c5..761486464cf 100644
--- a/xray/test/test_dataset.py
+++ b/xray/test/test_dataset.py
@@ -124,7 +124,7 @@ def test_constructor(self):
         with self.assertRaisesRegexp(ValueError,
                 "variable 'x' has the same name"):
             Dataset({'a': x1, 'x': z})
-        with self.assertRaisesRegexp(TypeError, 'must be an array or'):
+        with self.assertRaisesRegexp(TypeError, 'must be given by arrays or'):
             Dataset({'x': (1, 2, 3, 4, 5, 6, 7)})
         with self.assertRaisesRegexp(ValueError, 'already exists as a scalar'):
             Dataset({'x': 0, 'y': ('x', [1, 2, 3])})
@@ -1034,6 +1034,9 @@ def test_rename(self):
         with self.assertRaisesRegexp(ValueError, "cannot rename 'not_a_var'"):
             data.rename({'not_a_var': 'nada'})
 
+        with self.assertRaisesRegexp(ValueError, "'var1' already exists"):
+            data.rename({'var2': 'var1'})
+
         # verify that we can rename a variable without accessing the data
         var1 = data['var1']
         data['var1'] = (var1.dims, InaccessibleArray(var1.values))
@@ -1205,8 +1208,10 @@ def test_getitem(self):
         self.assertDatasetEqual(expected, actual)
 
         actual = data['numbers']
-        expected = DataArray(data['numbers'].variable, [data['dim3']],
-                             name='numbers')
+        expected = DataArray(data['numbers'].variable,
+                             {'dim3': data['dim3'],
+                              'numbers': data['numbers']},
+                             dims='dim3', name='numbers')
         self.assertDataArrayIdentical(expected, actual)
 
         actual = data[dict(dim1=0)]
@@ -1243,6 +1248,14 @@ def test_virtual_variables(self):
         ds = Dataset({'t': ('x', pd.date_range('2000-01-01', periods=3))})
         self.assertTrue((ds['t.year'] == 2000).all())
 
+    def test_virtual_variable_same_name(self):
+        # regression test for GH367
+        times = pd.date_range('2000-01-01', freq='H', periods=5)
+        data = Dataset({'time': times})
+        actual = data['time.time']
+        expected = DataArray(times.time, {'time': times}, name='time')
+        self.assertDataArrayIdentical(actual, expected)
+
     def test_time_season(self):
         ds = Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')})
         expected = ['DJF'] * 2 + ['MAM'] * 3 + ['JJA'] * 3 + ['SON'] * 3 + ['DJF']
diff --git a/xray/test/test_plot.py b/xray/test/test_plot.py
index 6f85f910cf4..f3e50865e22 100644
--- a/xray/test/test_plot.py
+++ b/xray/test/test_plot.py
@@ -593,7 +593,7 @@ def test_default_title(self):
         a.coords['d'] = u'foo'
         self.plotfunc(a.isel(c=1))
         title = plt.gca().get_title()
-        self.assertEqual('c = 1, d = foo', title)
+        self.assertTrue('c = 1, d = foo' == title or 'd = foo, c = 1' == title)
 
     def test_colorbar_label(self):
         self.darray.name = 'testvar'
diff --git a/xray/test/test_variable.py b/xray/test/test_variable.py
index 44a7d60e451..79534fe7748 100644
--- a/xray/test/test_variable.py
+++ b/xray/test/test_variable.py
@@ -9,7 +9,7 @@
 
 from xray import Variable, Dataset, DataArray
 from xray.core import indexing
-from xray.core.variable import (Coordinate, as_variable, _as_compatible_data)
+from xray.core.variable import (Coordinate, as_variable, as_compatible_data)
 from xray.core.indexing import PandasIndexAdapter, LazilyIndexedArray
 from xray.core.pycompat import PY3, OrderedDict
 
@@ -919,11 +919,11 @@ def test_unchanged_types(self):
                          pd.date_range('2000-01-01', periods=3).values]:
                 x = t(data)
                 self.assertIs(source_ndarray(x),
-                              source_ndarray(_as_compatible_data(x)))
+                              source_ndarray(as_compatible_data(x)))
 
     def test_converted_types(self):
         for input_array in [[[0, 1, 2]], pd.DataFrame([[0, 1, 2]])]:
-            actual = _as_compatible_data(input_array)
+            actual = as_compatible_data(input_array)
             self.assertArrayEqual(np.asarray(input_array), actual)
             self.assertEqual(np.ndarray, type(actual))
             self.assertEqual(np.asarray(input_array).dtype, actual.dtype)
@@ -931,39 +931,39 @@ def test_converted_types(self):
     def test_masked_array(self):
         original = np.ma.MaskedArray(np.arange(5))
         expected = np.arange(5)
-        actual = _as_compatible_data(original)
+        actual = as_compatible_data(original)
         self.assertArrayEqual(expected, actual)
         self.assertEqual(np.dtype(int), actual.dtype)
 
         original = np.ma.MaskedArray(np.arange(5), mask=4 * [False] + [True])
         expected = np.arange(5.0)
         expected[-1] = np.nan
-        actual = _as_compatible_data(original)
+        actual = as_compatible_data(original)
         self.assertArrayEqual(expected, actual)
         self.assertEqual(np.dtype(float), actual.dtype)
 
     def test_datetime(self):
         expected = np.datetime64('2000-01-01T00Z')
-        actual = _as_compatible_data(expected)
+        actual = as_compatible_data(expected)
         self.assertEqual(expected, actual)
         self.assertEqual(np.ndarray, type(actual))
         self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
 
         expected = np.array([np.datetime64('2000-01-01T00Z')])
-        actual = _as_compatible_data(expected)
+        actual = as_compatible_data(expected)
         self.assertEqual(np.asarray(expected), actual)
         self.assertEqual(np.ndarray, type(actual))
         self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
 
         expected = np.array([np.datetime64('2000-01-01T00Z', 'ns')])
-        actual = _as_compatible_data(expected)
+        actual = as_compatible_data(expected)
         self.assertEqual(np.asarray(expected), actual)
         self.assertEqual(np.ndarray, type(actual))
         self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
         self.assertIs(expected, source_ndarray(np.asarray(actual)))
 
         expected = np.datetime64('2000-01-01T00Z', 'ns')
-        actual = _as_compatible_data(datetime(2000, 1, 1))
+        actual = as_compatible_data(datetime(2000, 1, 1))
         self.assertEqual(np.asarray(expected), actual)
         self.assertEqual(np.ndarray, type(actual))
         self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)