From 044fc695f2bed56e174a9ec67d039331ccc1b856 Mon Sep 17 00:00:00 2001 From: khuston Date: Tue, 15 Mar 2016 14:18:15 -0400 Subject: [PATCH 1/4] Fix DATA type parsing and add explicit bond types **NOTE:** `atoms.bonds` is broken for atom groups without explicit bond typing in this commit. In order to write a LAMMPS.DATAWriter, I first need explicit typing of bonds, angles, dihedrals, and impropers beyond the tupling of atoms types done now. To hack bond types into the topology, I branched issue-363 and tried the following: 1. Add optional `types` keyword to `topologyattrs.Bonds`. This is stored in the class instance a `self._bondtypes`. A topology parser can pass `types` to the Bonds topologyattr. 2. When `topologyattrs.Bonds` is called upon to generate a TopologyGroup, these values are passed on via a new keyword in `TopologyGroup.__init__()`. They are stored in the TopologyGroup instance as `self._bondtypes`. 3. I modified `TopologyGroup.__getitem__` to yield a Bond with attribute `_bondtype`. If `_bondtype is not None` then it overrides the default behavior of `bond.type`. 4. I modified `TopologyGroup()` calls wherever I could find them to pass on `_bondtype` in the instantiation. I found that the DATAParser in branch #363 is slightly broken, because the atom types are saved as integers which breaks `select_atoms` (I think it expects strings, and typing the types as strings on parsing fixes this issue). It seems there is currently no test for atom selection from LAMMPS DATA files, so this might have gone undetected. An error is raised for `ag.bonds` for an atom group that doesn't have bonds, but this seems to also be the case for branch issue-363 right now, so I didn't try to fix this. Two places are especially ugly. One is where I tried to hack `_bondtypes` into the existing code to remove duplicate bonds. This is also where the code seems to be broken for non-explicit bond types. In this case, I set `_bondtypes` to be a numpy array full of `None` with `dtype=object`. It seems that `unique_rows` fails for this case, although it works fine when `_bondtypes.dtype == "|S1"`. --- package/MDAnalysis/core/topologyattrs.py | 24 +++++++++-- package/MDAnalysis/core/topologyobjects.py | 48 ++++++++++++++------- package/MDAnalysis/topology/LAMMPSParser.py | 15 ++++--- 3 files changed, 62 insertions(+), 25 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 85503e86329..d0c79eef907 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -870,7 +870,7 @@ class Bonds(AtomAttr): singular = 'bonds' transplants = defaultdict(list) - def __init__(self, values): + def __init__(self, values, types=None): """ Arguments --------- @@ -881,10 +881,24 @@ def __init__(self, values): """ self.values = values self._cache = dict() + self.order_bonds() + self._bondtypes = {} + if types is None: + types = [None for value in values] + for value, type in zip(self.values, types): + self._bondtypes[value] = type def __len__(self): return len(self._bondDict) + def order_bonds(self): + ordered_values = [] + for b in self.values: + if b[0] > b[-1]: + b = b[::-1] + ordered_values.append(b) + self.values = ordered_values + @property @cached('bd') def _bondDict(self): @@ -896,7 +910,7 @@ def _bondDict(self): # to be less than the last # eg (0, 1) not (1, 0) # and (4, 10, 8) not (8, 10, 4) - if b[0] < b[-1]: + if b[0] > b[-1]: b = b[::-1] for a in b: bd[a].append(b) @@ -909,8 +923,12 @@ def get_atoms(self, ag): except TypeError: # maybe we got passed an Atom unique_bonds = self._bondDict[ag._ix] + if self._bondtypes is not None: + _bondtypes = {value: self._bondtypes[value] for value in unique_bonds} + else: + _bondtypes = None bond_idx = np.array(sorted(unique_bonds)) - return TopologyGroup(bond_idx, ag._u, self.singular[:-1]) + return TopologyGroup(bond_idx, ag._u, self.singular[:-1], np.array(_bondtypes.values())) def bonded_atoms(self): """An AtomGroup of all atoms bonded to this Atom""" diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py index 677cc29a69f..fed801d6f60 100644 --- a/package/MDAnalysis/core/topologyobjects.py +++ b/package/MDAnalysis/core/topologyobjects.py @@ -48,7 +48,7 @@ class TopologyObject(object): """ __slots__ = ("_ix", "_u", "btype") - def __init__(self, ix, universe): + def __init__(self, ix, universe, type=None): """Create a topology object Arguments @@ -58,6 +58,7 @@ def __init__(self, ix, universe): """ self._ix = ix self._u = universe + self._bondtype = type @property def atoms(self): @@ -88,7 +89,10 @@ def type(self): a.type == b.type or a.type == b.type[::-1] """ - return tuple(self.atoms.types) + if self._bondtype is not None: + return self._bondtype + else: + return tuple(self.atoms.types) def __hash__(self): return hash((self._u, tuple(self.indices))) @@ -424,8 +428,9 @@ def __getitem__(self, key): selection = self.dict[key[::-1]] bix = np.vstack([s.indices for s in selection]) + types = np.vstack([s.type for s in selection]) - return TopologyGroup(bix, self._u, self.toptype) + return TopologyGroup(bix, self._u, self.toptype, types) else: raise KeyError(key) @@ -490,23 +495,29 @@ class TopologyGroup(object): """ _allowed_types = {'bond', 'angle', 'dihedral', 'improper'} - def __init__(self, bondidx, universe, btype=None): - # remove duplicate bonds - self._bix = unique_rows(bondidx) - self._u = universe - + def __init__(self, bondidx, universe, btype=None, type=None): if btype is None: # guess what I am # difference between dihedral and improper # not really important - self.btype = {2:'bond', 3:'angle', 4:'dihedral'}[ - len(bondidx[0])] + self.btype = {2:'bond', 3:'angle', 4:'dihedral'}[len(bondidx[0])] elif btype in self._allowed_types: self.btype = btype else: raise ValueError("Unsupported btype, use one of {}" "".format(self._allowed_types)) + # remove duplicate bonds + if type is not None: + split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype] + self._bix, self._bondtypes, _ = np.hsplit(unique_rows(np.hstack([bondidx,type.reshape(len(type), 1)])), + [split_index, 10]) + self._bix = self._bix.astype(np.int32) + else: + self._bix = unique_rows(bondidx) + self._bondtypes = np.array([None for x in self._bix]).reshape(len(self._bix), 1) + self._u = universe + # Create vertical AtomGroups self._ags = [universe.atoms[bondidx[:,i]] for i in xrange(bondidx.shape[1])] @@ -623,11 +634,13 @@ def __add__(self, other): # Reshape indices to be 2d array return TopologyGroup(other.indices[None, :], other.universe, - other.btype) + other.btype, + other._bondtypes[None, :]) else: return TopologyGroup(other.indices, other.universe, - other.btype) + other.btype, + other._bondtypes) # add TO to me if isinstance(other, TopologyObject): @@ -637,7 +650,8 @@ def __add__(self, other): return TopologyGroup( np.concatenate([self.indices, other.indices[None, :]]), self.universe, - self.btype) + self.btype, + np.concatenate([self._bondtypes, other._bondtypes[None, :]])) # add TG to me if self.btype != other.btype: @@ -646,7 +660,8 @@ def __add__(self, other): return TopologyGroup( np.concatenate([self.indices, other.indices]), self.universe, - self.btype) + self.btype, + np.concatenate([self._bondtypes, other._bondtypes])) def __getitem__(self, item): """Returns a particular bond as single object or a subset of @@ -661,9 +676,10 @@ def __getitem__(self, item): 'angle':Angle, 'dihedral':Dihedral, 'improper':ImproperDihedral}[self.btype] - return outclass(self._bix[item], self._u) + return outclass(self._bix[item], self._u, type=self._bondtypes[item][0]) # Slice my index array with the item - return self.__class__(self._bix[item], self._u, btype=self.btype) + return self.__class__(self._bix[item], self._u, btype=self.btype,\ + type=self._bondtypes[item][0]) def __contains__(self, item): """Tests if this TopologyGroup contains a bond""" diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py index 5fdad8beb8d..2360ffa9c23 100644 --- a/package/MDAnalysis/topology/LAMMPSParser.py +++ b/package/MDAnalysis/topology/LAMMPSParser.py @@ -204,11 +204,11 @@ def parse(self): (Impropers, 'Impropers', 4) ]: try: - sect = self._parse_bond_section(sects[L], nentries) + type, sect = self._parse_bond_section(sects[L], nentries) except KeyError: pass else: - top.add_TopologyAttr(attr(sect)) + top.add_TopologyAttr(attr(sect, type)) return top @@ -273,12 +273,14 @@ def _parse_bond_section(self, datalines, nentries): nentries - number of integers per line """ section = [] + type = [] for line in datalines: line = line.split() # map to 0 based int section.append(tuple(map(lambda x: int(x) - 1, line[2:2 + nentries]))) - return tuple(section) + type.append(line[1]) + return tuple(type), tuple(section) def _parse_atoms(self, datalines, massdict=None): """Creates a Topology object @@ -315,14 +317,15 @@ def _parse_atoms(self, datalines, massdict=None): n = len(datalines[0].split()) has_charge = True if n in [7, 10] else False - types = np.zeros(n_atoms, dtype=np.int32) + types = np.zeros(n_atoms, dtype='|S1') resids = np.zeros(n_atoms, dtype=np.int32) if has_charge: charges = np.zeros(n_atoms, dtype=np.float32) for line in datalines: line = line.split() - idx, resid, atype = map(int, line[:3]) + idx, resid = map(int, line[:2]) + atype = line[2] idx -= 1 resids[idx] = resid types[idx] = atype @@ -360,7 +363,7 @@ def _parse_masses(self, datalines): masses = {} for line in datalines: line = line.split() - masses[int(line[0])] = float(line[1]) + masses[line[0]] = float(line[1]) return masses From 9bb9c49cf2f93cb6cc06ca733317874e6e0ee16f Mon Sep 17 00:00:00 2001 From: khuston Date: Tue, 15 Mar 2016 16:57:13 -0400 Subject: [PATCH 2/4] replace unique_row with np.unique and np records The function `unique_row` was causing problems because bond type could be None, and the containing numpy array had dtype==object. I replaced `unique_rows` with `np.unique` by first converting the bond indices and bond types into a numpy record, and then breaking this record back into the bond indices and bond types. I also simplified some of what I'd written in `topologyattrs.Bonds`. --- package/MDAnalysis/core/topologyattrs.py | 30 +++++++--------------- package/MDAnalysis/core/topologyobjects.py | 20 +++++++-------- package/MDAnalysis/lib/util.py | 29 --------------------- 3 files changed, 18 insertions(+), 61 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index d0c79eef907..afa9a0d527d 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -20,6 +20,7 @@ Common TopologyAttrs used by most topology parsers. """ +from six.moves import zip from collections import defaultdict import itertools import numpy as np @@ -880,32 +881,21 @@ def __init__(self, values, types=None): Eg: [(0, 1), (1, 2), (2, 3)] """ self.values = values - self._cache = dict() - self.order_bonds() - self._bondtypes = {} if types is None: types = [None for value in values] - for value, type in zip(self.values, types): - self._bondtypes[value] = type + self.types = types + self._cache = dict() def __len__(self): return len(self._bondDict) - def order_bonds(self): - ordered_values = [] - for b in self.values: - if b[0] > b[-1]: - b = b[::-1] - ordered_values.append(b) - self.values = ordered_values - @property @cached('bd') def _bondDict(self): """Lazily built mapping of atoms:bonds""" bd = defaultdict(list) - for b in self.values: + for b, t in zip(self.values, self.types): # We always want the first index # to be less than the last # eg (0, 1) not (1, 0) @@ -913,7 +903,7 @@ def _bondDict(self): if b[0] > b[-1]: b = b[::-1] for a in b: - bd[a].append(b) + bd[a].append((b, t)) return bd def get_atoms(self, ag): @@ -923,12 +913,10 @@ def get_atoms(self, ag): except TypeError: # maybe we got passed an Atom unique_bonds = self._bondDict[ag._ix] - if self._bondtypes is not None: - _bondtypes = {value: self._bondtypes[value] for value in unique_bonds} - else: - _bondtypes = None - bond_idx = np.array(sorted(unique_bonds)) - return TopologyGroup(bond_idx, ag._u, self.singular[:-1], np.array(_bondtypes.values())) + bond_idx, types = np.hsplit(np.array(sorted(unique_bonds)), 2) + bond_idx = np.array(bond_idx.ravel().tolist(), dtype=np.int32) + types = types.ravel() + return TopologyGroup(bond_idx, ag._u, self.singular[:-1], types) def bonded_atoms(self): """An AtomGroup of all atoms bonded to this Atom""" diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py index fed801d6f60..7573605df93 100644 --- a/package/MDAnalysis/core/topologyobjects.py +++ b/package/MDAnalysis/core/topologyobjects.py @@ -27,7 +27,7 @@ from ..lib.mdamath import norm, dihedral from ..lib.mdamath import angle as slowang -from ..lib.util import cached, unique_rows +from ..lib.util import cached from ..lib import distances @@ -508,14 +508,12 @@ def __init__(self, bondidx, universe, btype=None, type=None): "".format(self._allowed_types)) # remove duplicate bonds - if type is not None: - split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype] - self._bix, self._bondtypes, _ = np.hsplit(unique_rows(np.hstack([bondidx,type.reshape(len(type), 1)])), - [split_index, 10]) - self._bix = self._bix.astype(np.int32) - else: - self._bix = unique_rows(bondidx) - self._bondtypes = np.array([None for x in self._bix]).reshape(len(self._bix), 1) + if type is None: + type = np.array([None for x in bondidx]).reshape(len(bondidx), 1) + split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype] + unique_records = np.unique((np.rec.fromarrays([bondidx[:, i] for i in xrange(split_index)] + [type]))) + self._bix = np.vstack([unique_records[unique_records.dtype.names[i]] for i in xrange(split_index)]).T + self._bondtypes = unique_records[unique_records.dtype.names[bondidx.shape[1]]] self._u = universe # Create vertical AtomGroups @@ -676,10 +674,10 @@ def __getitem__(self, item): 'angle':Angle, 'dihedral':Dihedral, 'improper':ImproperDihedral}[self.btype] - return outclass(self._bix[item], self._u, type=self._bondtypes[item][0]) + return outclass(self._bix[item], self._u, type=self._bondtypes[item]) # Slice my index array with the item return self.__class__(self._bix[item], self._u, btype=self.btype,\ - type=self._bondtypes[item][0]) + type=self._bondtypes[item]) def __contains__(self, item): """Tests if this TopologyGroup contains a bond""" diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py index 9f498710eaa..cda876c0723 100644 --- a/package/MDAnalysis/lib/util.py +++ b/package/MDAnalysis/lib/util.py @@ -1164,32 +1164,3 @@ def wrapper(self, *args, **kwargs): return wrapper return cached_lookup - - -def unique_rows(arr): - """Return the unique rows from an array - - Arguments - --------- - arr - np.array of shape (n1, m) - - Returns - ------- - unique_rows (n2, m) - - Examples - -------- - Remove dupicate rows from an array: - >>> a = np.array([[0, 1], [1, 2], [1, 2], [0, 1], [2, 3]]) - >>> b = unique_rows(a) - >>> b - array([[0, 1], [1, 2], [2, 3]]) - """ - # From here, but adapted to handle any size rows - # https://mail.scipy.org/pipermail/scipy-user/2011-December/031200.html - m = arr.shape[1] - - u = np.unique(arr.view( - dtype=np.dtype([(str(i), arr.dtype) for i in xrange(m)]) - )) - return u.view(arr.dtype).reshape(-1, m) From 906c46c59359b14b021e972a443b5a5266db723f Mon Sep 17 00:00:00 2001 From: khuston Date: Tue, 15 Mar 2016 17:50:41 -0400 Subject: [PATCH 3/4] Restore unique_rows to lib.util --- package/MDAnalysis/lib/util.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py index cda876c0723..9f498710eaa 100644 --- a/package/MDAnalysis/lib/util.py +++ b/package/MDAnalysis/lib/util.py @@ -1164,3 +1164,32 @@ def wrapper(self, *args, **kwargs): return wrapper return cached_lookup + + +def unique_rows(arr): + """Return the unique rows from an array + + Arguments + --------- + arr - np.array of shape (n1, m) + + Returns + ------- + unique_rows (n2, m) + + Examples + -------- + Remove dupicate rows from an array: + >>> a = np.array([[0, 1], [1, 2], [1, 2], [0, 1], [2, 3]]) + >>> b = unique_rows(a) + >>> b + array([[0, 1], [1, 2], [2, 3]]) + """ + # From here, but adapted to handle any size rows + # https://mail.scipy.org/pipermail/scipy-user/2011-December/031200.html + m = arr.shape[1] + + u = np.unique(arr.view( + dtype=np.dtype([(str(i), arr.dtype) for i in xrange(m)]) + )) + return u.view(arr.dtype).reshape(-1, m) From 5b564584bd19a6c542140c07cd2e0396f03b481e Mon Sep 17 00:00:00 2001 From: khuston Date: Wed, 16 Mar 2016 01:28:12 -0400 Subject: [PATCH 4/4] Fix handling of empty TopologyGroups If parser told universe to create empty TopologyGroup, the resulting group was an attribute of atoms but gave IndexError on trying to access it. Seems to work as expected now... creating TopologyGroup with 0 TopologyObjects --- package/MDAnalysis/core/topologyobjects.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py index 7573605df93..105824acead 100644 --- a/package/MDAnalysis/core/topologyobjects.py +++ b/package/MDAnalysis/core/topologyobjects.py @@ -511,14 +511,19 @@ def __init__(self, bondidx, universe, btype=None, type=None): if type is None: type = np.array([None for x in bondidx]).reshape(len(bondidx), 1) split_index = {'bond':2, 'angle':3, 'dihedral':4, 'improper':4}[self.btype] - unique_records = np.unique((np.rec.fromarrays([bondidx[:, i] for i in xrange(split_index)] + [type]))) - self._bix = np.vstack([unique_records[unique_records.dtype.names[i]] for i in xrange(split_index)]).T - self._bondtypes = unique_records[unique_records.dtype.names[bondidx.shape[1]]] + if len(bondidx) > 0: + unique_records = np.unique((np.rec.fromarrays([bondidx[:, i] for i in xrange(split_index)] + [type]))) + self._bix = np.vstack([unique_records[unique_records.dtype.names[i]] for i in xrange(split_index)]).T + self._bondtypes = unique_records[unique_records.dtype.names[bondidx.shape[1]]] + # Create vertical AtomGroups + self._ags = [universe.atoms[bondidx[:,i]] + for i in xrange(bondidx.shape[1])] + else: + self._bix = np.array([]) + self._bondtypes = np.array([]) + self._ags = [] self._u = universe - # Create vertical AtomGroups - self._ags = [universe.atoms[bondidx[:,i]] - for i in xrange(bondidx.shape[1])] self._cache = dict() # used for topdict saving