From 044fc695f2bed56e174a9ec67d039331ccc1b856 Mon Sep 17 00:00:00 2001
From: khuston <khuston@gmail.com>
Date: Tue, 15 Mar 2016 14:18:15 -0400
Subject: [PATCH 1/4] Fix DATA type parsing and add explicit bond types

**NOTE:** `atoms.bonds` is broken for atom groups without explicit bond typing
in this commit.

In order to write a LAMMPS.DATAWriter, I first need explicit typing of bonds,
angles, dihedrals, and impropers beyond the tupling of atoms types done now.
To hack bond types into the topology, I branched issue-363 and tried the
following:

1. Add optional `types` keyword to `topologyattrs.Bonds`. This is stored
in the class instance a `self._bondtypes`. A topology parser can pass `types`
to the Bonds topologyattr.
2. When `topologyattrs.Bonds` is called upon to generate a TopologyGroup,
these values are passed on via a new keyword in `TopologyGroup.__init__()`.
They are stored in the TopologyGroup instance as `self._bondtypes`.
3. I modified `TopologyGroup.__getitem__` to yield a Bond with attribute
`_bondtype`. If `_bondtype is not None` then it overrides the default behavior
of `bond.type`.
4. I modified `TopologyGroup()` calls wherever I could find them to pass on
`_bondtype` in the instantiation.

I found that the DATAParser in branch #363 is slightly broken, because
the atom types are saved as integers which breaks `select_atoms` (I think it
expects strings, and typing the types as strings on parsing fixes this issue).
It seems there is currently no test for atom selection from LAMMPS DATA files,
so this might have gone undetected.

An error is raised for `ag.bonds` for an atom group that doesn't have bonds,
but this seems to also be the case for branch issue-363 right now, so I didn't
try to fix this.

Two places are especially ugly. One is where I tried to hack `_bondtypes`
into the existing code to remove duplicate bonds. This is also where
the code seems to be broken for non-explicit bond types. In this case,
I set `_bondtypes` to be a numpy array full of `None` with `dtype=object`.
It seems that `unique_rows` fails for this case, although it works fine
when `_bondtypes.dtype == "|S1"`.
---
 package/MDAnalysis/core/topologyattrs.py    | 24 +++++++++--
 package/MDAnalysis/core/topologyobjects.py  | 48 ++++++++++++++-------
 package/MDAnalysis/topology/LAMMPSParser.py | 15 ++++---
 3 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py
index 85503e86329..d0c79eef907 100644
--- a/package/MDAnalysis/core/topologyattrs.py
+++ b/package/MDAnalysis/core/topologyattrs.py
@@ -870,7 +870,7 @@ class Bonds(AtomAttr):
     singular = 'bonds'
     transplants = defaultdict(list)
 
-    def __init__(self, values):
+    def __init__(self, values, types=None):
         """
         Arguments
         ---------
@@ -881,10 +881,24 @@ def __init__(self, values):
         """
         self.values = values
         self._cache = dict()
+        self.order_bonds()
+        self._bondtypes = {}
+        if types is None:
+            types = [None for value in values]
+        for value, type in zip(self.values, types):
+            self._bondtypes[value] = type
 
     def __len__(self):
         return len(self._bondDict)
 
+    def order_bonds(self):
+        ordered_values = []
+        for b in self.values:
+            if b[0] > b[-1]:
+                b = b[::-1]
+            ordered_values.append(b)
+        self.values = ordered_values
+
     @property
     @cached('bd')
     def _bondDict(self):
@@ -896,7 +910,7 @@ def _bondDict(self):
             # to be less than the last
             # eg (0, 1) not (1, 0)
             # and (4, 10, 8) not (8, 10, 4)
-            if b[0] < b[-1]:
+            if b[0] > b[-1]:
                 b = b[::-1]
             for a in b:
                 bd[a].append(b)
@@ -909,8 +923,12 @@ def get_atoms(self, ag):
         except TypeError:
             # maybe we got passed an Atom
             unique_bonds = self._bondDict[ag._ix]
+        if self._bondtypes is not None:
+            _bondtypes = {value: self._bondtypes[value] for value in unique_bonds}
+        else:
+            _bondtypes = None
         bond_idx = np.array(sorted(unique_bonds))
-        return TopologyGroup(bond_idx, ag._u, self.singular[:-1])
+        return TopologyGroup(bond_idx, ag._u, self.singular[:-1], np.array(_bondtypes.values()))
 
     def bonded_atoms(self):
         """An AtomGroup of all atoms bonded to this Atom"""
diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py
index 677cc29a69f..fed801d6f60 100644
--- a/package/MDAnalysis/core/topologyobjects.py
+++ b/package/MDAnalysis/core/topologyobjects.py
@@ -48,7 +48,7 @@ class TopologyObject(object):
     """
     __slots__ = ("_ix", "_u", "btype")
 
-    def __init__(self, ix, universe):
+    def __init__(self, ix, universe, type=None):
         """Create a topology object
 
         Arguments
@@ -58,6 +58,7 @@ def __init__(self, ix, universe):
         """
         self._ix = ix
         self._u = universe
+        self._bondtype = type
 
     @property
     def atoms(self):
@@ -88,7 +89,10 @@ def type(self):
             a.type == b.type or a.type == b.type[::-1]
 
         """
-        return tuple(self.atoms.types)
+        if self._bondtype is not None:
+            return self._bondtype
+        else:
+            return tuple(self.atoms.types)
 
     def __hash__(self):
         return hash((self._u, tuple(self.indices)))
@@ -424,8 +428,9 @@ def __getitem__(self, key):
                 selection = self.dict[key[::-1]]
 
             bix = np.vstack([s.indices for s in selection])
+            types = np.vstack([s.type for s in selection])
 
-            return TopologyGroup(bix, self._u, self.toptype)
+            return TopologyGroup(bix, self._u, self.toptype, types)
         else:
             raise KeyError(key)
 
@@ -490,23 +495,29 @@ class TopologyGroup(object):
     """
     _allowed_types = {'bond', 'angle', 'dihedral', 'improper'}
 
-    def __init__(self, bondidx, universe, btype=None):
-        # remove duplicate bonds
-        self._bix = unique_rows(bondidx)
-        self._u = universe
-
+    def __init__(self, bondidx, universe, btype=None, type=None):
         if btype is None:
             # guess what I am
             # difference between dihedral and improper
             # not really important
-            self.btype = {2:'bond', 3:'angle', 4:'dihedral'}[
-                len(bondidx[0])]
+            self.btype = {2:'bond', 3:'angle', 4:'dihedral'}[len(bondidx[0])]
         elif btype in self._allowed_types:
             self.btype = btype
         else:
             raise ValueError("Unsupported btype, use one of {}"
                              "".format(self._allowed_types))
 
+        # remove duplicate bonds
+        if type is not None:
+            split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype]
+            self._bix, self._bondtypes, _ = np.hsplit(unique_rows(np.hstack([bondidx,type.reshape(len(type), 1)])),
+                                                                             [split_index, 10])
+            self._bix = self._bix.astype(np.int32)
+        else:
+            self._bix = unique_rows(bondidx)
+            self._bondtypes = np.array([None for x in self._bix]).reshape(len(self._bix), 1)
+        self._u = universe
+
         # Create vertical AtomGroups
         self._ags = [universe.atoms[bondidx[:,i]]
                      for i in xrange(bondidx.shape[1])]
@@ -623,11 +634,13 @@ def __add__(self, other):
                 # Reshape indices to be 2d array
                 return TopologyGroup(other.indices[None, :],
                                      other.universe,
-                                     other.btype)
+                                     other.btype,
+                                     other._bondtypes[None, :])
             else:
                 return TopologyGroup(other.indices,
                                      other.universe,
-                                     other.btype)
+                                     other.btype,
+                                     other._bondtypes)
 
         # add TO to me
         if isinstance(other, TopologyObject):
@@ -637,7 +650,8 @@ def __add__(self, other):
             return TopologyGroup(
                 np.concatenate([self.indices, other.indices[None, :]]),
                 self.universe,
-                self.btype)
+                self.btype,
+                np.concatenate([self._bondtypes, other._bondtypes[None, :]]))
 
         # add TG to me
         if self.btype != other.btype:
@@ -646,7 +660,8 @@ def __add__(self, other):
         return TopologyGroup(
             np.concatenate([self.indices, other.indices]),
             self.universe,
-            self.btype)
+            self.btype,
+            np.concatenate([self._bondtypes, other._bondtypes]))
 
     def __getitem__(self, item):
         """Returns a particular bond as single object or a subset of
@@ -661,9 +676,10 @@ def __getitem__(self, item):
                         'angle':Angle,
                         'dihedral':Dihedral,
                         'improper':ImproperDihedral}[self.btype]
-            return outclass(self._bix[item], self._u)
+            return outclass(self._bix[item], self._u, type=self._bondtypes[item][0])
         # Slice my index array with the item
-        return self.__class__(self._bix[item], self._u, btype=self.btype)
+        return self.__class__(self._bix[item], self._u, btype=self.btype,\
+                              type=self._bondtypes[item][0])
 
     def __contains__(self, item):
         """Tests if this TopologyGroup contains a bond"""
diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py
index 5fdad8beb8d..2360ffa9c23 100644
--- a/package/MDAnalysis/topology/LAMMPSParser.py
+++ b/package/MDAnalysis/topology/LAMMPSParser.py
@@ -204,11 +204,11 @@ def parse(self):
                 (Impropers, 'Impropers', 4)
         ]:
             try:
-                sect = self._parse_bond_section(sects[L], nentries)
+                type, sect = self._parse_bond_section(sects[L], nentries)
             except KeyError:
                 pass
             else:
-                top.add_TopologyAttr(attr(sect))
+                top.add_TopologyAttr(attr(sect, type))
 
         return top
 
@@ -273,12 +273,14 @@ def _parse_bond_section(self, datalines, nentries):
         nentries - number of integers per line
         """
         section = []
+        type = []
         for line in datalines:
             line = line.split()
             # map to 0 based int
             section.append(tuple(map(lambda x: int(x) - 1, 
                                      line[2:2 + nentries])))
-        return tuple(section)
+            type.append(line[1])
+        return tuple(type), tuple(section)
 
     def _parse_atoms(self, datalines, massdict=None):
         """Creates a Topology object
@@ -315,14 +317,15 @@ def _parse_atoms(self, datalines, massdict=None):
         n = len(datalines[0].split())
         has_charge = True if n in [7, 10] else False
 
-        types = np.zeros(n_atoms, dtype=np.int32)
+        types = np.zeros(n_atoms, dtype='|S1')
         resids = np.zeros(n_atoms, dtype=np.int32)
         if has_charge:
             charges = np.zeros(n_atoms, dtype=np.float32)
 
         for line in datalines:
             line = line.split()
-            idx, resid, atype = map(int, line[:3])
+            idx, resid = map(int, line[:2])
+            atype = line[2]
             idx -= 1
             resids[idx] = resid
             types[idx] = atype
@@ -360,7 +363,7 @@ def _parse_masses(self, datalines):
         masses = {}
         for line in datalines:
             line = line.split()
-            masses[int(line[0])] = float(line[1])
+            masses[line[0]] = float(line[1])
 
         return masses
 

From 9bb9c49cf2f93cb6cc06ca733317874e6e0ee16f Mon Sep 17 00:00:00 2001
From: khuston <khuston@gmail.com>
Date: Tue, 15 Mar 2016 16:57:13 -0400
Subject: [PATCH 2/4] replace unique_row with np.unique and np records

The function `unique_row` was causing problems because bond type could
be None, and the containing numpy array had dtype==object. I replaced
`unique_rows` with `np.unique` by first converting the bond indices and
bond types into a numpy record, and then breaking this record back into
the bond indices and bond types.

I also simplified some of what I'd written in `topologyattrs.Bonds`.
---
 package/MDAnalysis/core/topologyattrs.py   | 30 +++++++---------------
 package/MDAnalysis/core/topologyobjects.py | 20 +++++++--------
 package/MDAnalysis/lib/util.py             | 29 ---------------------
 3 files changed, 18 insertions(+), 61 deletions(-)

diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py
index d0c79eef907..afa9a0d527d 100644
--- a/package/MDAnalysis/core/topologyattrs.py
+++ b/package/MDAnalysis/core/topologyattrs.py
@@ -20,6 +20,7 @@
 Common TopologyAttrs used by most topology parsers.
 
 """
+from six.moves import zip
 from collections import defaultdict
 import itertools
 import numpy as np
@@ -880,32 +881,21 @@ def __init__(self, values, types=None):
         Eg:  [(0, 1), (1, 2), (2, 3)]
         """
         self.values = values
-        self._cache = dict()
-        self.order_bonds()
-        self._bondtypes = {}
         if types is None:
             types = [None for value in values]
-        for value, type in zip(self.values, types):
-            self._bondtypes[value] = type
+        self.types = types
+        self._cache = dict()
 
     def __len__(self):
         return len(self._bondDict)
 
-    def order_bonds(self):
-        ordered_values = []
-        for b in self.values:
-            if b[0] > b[-1]:
-                b = b[::-1]
-            ordered_values.append(b)
-        self.values = ordered_values
-
     @property
     @cached('bd')
     def _bondDict(self):
         """Lazily built mapping of atoms:bonds"""
         bd = defaultdict(list)
 
-        for b in self.values:
+        for b, t in zip(self.values, self.types):
             # We always want the first index
             # to be less than the last
             # eg (0, 1) not (1, 0)
@@ -913,7 +903,7 @@ def _bondDict(self):
             if b[0] > b[-1]:
                 b = b[::-1]
             for a in b:
-                bd[a].append(b)
+                bd[a].append((b, t))
         return bd
 
     def get_atoms(self, ag):
@@ -923,12 +913,10 @@ def get_atoms(self, ag):
         except TypeError:
             # maybe we got passed an Atom
             unique_bonds = self._bondDict[ag._ix]
-        if self._bondtypes is not None:
-            _bondtypes = {value: self._bondtypes[value] for value in unique_bonds}
-        else:
-            _bondtypes = None
-        bond_idx = np.array(sorted(unique_bonds))
-        return TopologyGroup(bond_idx, ag._u, self.singular[:-1], np.array(_bondtypes.values()))
+        bond_idx, types = np.hsplit(np.array(sorted(unique_bonds)), 2)
+        bond_idx = np.array(bond_idx.ravel().tolist(), dtype=np.int32)
+        types = types.ravel()
+        return TopologyGroup(bond_idx, ag._u, self.singular[:-1], types)
 
     def bonded_atoms(self):
         """An AtomGroup of all atoms bonded to this Atom"""
diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py
index fed801d6f60..7573605df93 100644
--- a/package/MDAnalysis/core/topologyobjects.py
+++ b/package/MDAnalysis/core/topologyobjects.py
@@ -27,7 +27,7 @@
 
 from ..lib.mdamath import norm, dihedral
 from ..lib.mdamath import angle as slowang
-from ..lib.util import cached, unique_rows
+from ..lib.util import cached
 from ..lib import distances
 
 
@@ -508,14 +508,12 @@ def __init__(self, bondidx, universe, btype=None, type=None):
                              "".format(self._allowed_types))
 
         # remove duplicate bonds
-        if type is not None:
-            split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype]
-            self._bix, self._bondtypes, _ = np.hsplit(unique_rows(np.hstack([bondidx,type.reshape(len(type), 1)])),
-                                                                             [split_index, 10])
-            self._bix = self._bix.astype(np.int32)
-        else:
-            self._bix = unique_rows(bondidx)
-            self._bondtypes = np.array([None for x in self._bix]).reshape(len(self._bix), 1)
+        if type is None:
+            type = np.array([None for x in bondidx]).reshape(len(bondidx), 1)
+        split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype]
+        unique_records = np.unique((np.rec.fromarrays([bondidx[:, i] for i in xrange(split_index)] + [type])))
+        self._bix = np.vstack([unique_records[unique_records.dtype.names[i]] for i in xrange(split_index)]).T
+        self._bondtypes = unique_records[unique_records.dtype.names[bondidx.shape[1]]]
         self._u = universe
 
         # Create vertical AtomGroups
@@ -676,10 +674,10 @@ def __getitem__(self, item):
                         'angle':Angle,
                         'dihedral':Dihedral,
                         'improper':ImproperDihedral}[self.btype]
-            return outclass(self._bix[item], self._u, type=self._bondtypes[item][0])
+            return outclass(self._bix[item], self._u, type=self._bondtypes[item])
         # Slice my index array with the item
         return self.__class__(self._bix[item], self._u, btype=self.btype,\
-                              type=self._bondtypes[item][0])
+                              type=self._bondtypes[item])
 
     def __contains__(self, item):
         """Tests if this TopologyGroup contains a bond"""
diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py
index 9f498710eaa..cda876c0723 100644
--- a/package/MDAnalysis/lib/util.py
+++ b/package/MDAnalysis/lib/util.py
@@ -1164,32 +1164,3 @@ def wrapper(self, *args, **kwargs):
         return wrapper
 
     return cached_lookup
-
-
-def unique_rows(arr):
-    """Return the unique rows from an array
-
-    Arguments
-    ---------
-    arr - np.array of shape (n1, m)
-
-    Returns
-    -------
-    unique_rows (n2, m)
-
-    Examples
-    --------
-    Remove dupicate rows from an array:
-    >>> a = np.array([[0, 1], [1, 2], [1, 2], [0, 1], [2, 3]])
-    >>> b = unique_rows(a)
-    >>> b
-    array([[0, 1], [1, 2], [2, 3]])
-    """
-    # From here, but adapted to handle any size rows
-    # https://mail.scipy.org/pipermail/scipy-user/2011-December/031200.html
-    m = arr.shape[1]
-
-    u = np.unique(arr.view(
-        dtype=np.dtype([(str(i), arr.dtype) for i in xrange(m)])
-    ))
-    return u.view(arr.dtype).reshape(-1, m)

From 906c46c59359b14b021e972a443b5a5266db723f Mon Sep 17 00:00:00 2001
From: khuston <khuston@gmail.com>
Date: Tue, 15 Mar 2016 17:50:41 -0400
Subject: [PATCH 3/4] Restore unique_rows to lib.util

---
 package/MDAnalysis/lib/util.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py
index cda876c0723..9f498710eaa 100644
--- a/package/MDAnalysis/lib/util.py
+++ b/package/MDAnalysis/lib/util.py
@@ -1164,3 +1164,32 @@ def wrapper(self, *args, **kwargs):
         return wrapper
 
     return cached_lookup
+
+
+def unique_rows(arr):
+    """Return the unique rows from an array
+
+    Arguments
+    ---------
+    arr - np.array of shape (n1, m)
+
+    Returns
+    -------
+    unique_rows (n2, m)
+
+    Examples
+    --------
+    Remove dupicate rows from an array:
+    >>> a = np.array([[0, 1], [1, 2], [1, 2], [0, 1], [2, 3]])
+    >>> b = unique_rows(a)
+    >>> b
+    array([[0, 1], [1, 2], [2, 3]])
+    """
+    # From here, but adapted to handle any size rows
+    # https://mail.scipy.org/pipermail/scipy-user/2011-December/031200.html
+    m = arr.shape[1]
+
+    u = np.unique(arr.view(
+        dtype=np.dtype([(str(i), arr.dtype) for i in xrange(m)])
+    ))
+    return u.view(arr.dtype).reshape(-1, m)

From 5b564584bd19a6c542140c07cd2e0396f03b481e Mon Sep 17 00:00:00 2001
From: khuston <khuston@gmail.com>
Date: Wed, 16 Mar 2016 01:28:12 -0400
Subject: [PATCH 4/4] Fix handling of empty TopologyGroups

If parser told universe to create empty TopologyGroup, the resulting
group was an attribute of atoms but gave IndexError on trying to access
it. Seems to work as expected now... creating TopologyGroup with 0
TopologyObjects
---
 package/MDAnalysis/core/topologyobjects.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py
index 7573605df93..105824acead 100644
--- a/package/MDAnalysis/core/topologyobjects.py
+++ b/package/MDAnalysis/core/topologyobjects.py
@@ -511,14 +511,19 @@ def __init__(self, bondidx, universe, btype=None, type=None):
         if type is None:
             type = np.array([None for x in bondidx]).reshape(len(bondidx), 1)
         split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype]
-        unique_records = np.unique((np.rec.fromarrays([bondidx[:, i] for i in xrange(split_index)] + [type])))
-        self._bix = np.vstack([unique_records[unique_records.dtype.names[i]] for i in xrange(split_index)]).T
-        self._bondtypes = unique_records[unique_records.dtype.names[bondidx.shape[1]]]
+        if len(bondidx) > 0:
+            unique_records = np.unique((np.rec.fromarrays([bondidx[:, i] for i in xrange(split_index)] + [type])))
+            self._bix = np.vstack([unique_records[unique_records.dtype.names[i]] for i in xrange(split_index)]).T
+            self._bondtypes = unique_records[unique_records.dtype.names[bondidx.shape[1]]]
+            # Create vertical AtomGroups
+            self._ags = [universe.atoms[bondidx[:,i]]
+                         for i in xrange(bondidx.shape[1])]
+        else:
+            self._bix = np.array([])
+            self._bondtypes = np.array([])
+            self._ags = []
         self._u = universe
 
-        # Create vertical AtomGroups
-        self._ags = [universe.atoms[bondidx[:,i]]
-                     for i in xrange(bondidx.shape[1])]
 
         self._cache = dict()  # used for topdict saving