From ffcd3f27f7cd705f7f165a80fe88f00000187a4f Mon Sep 17 00:00:00 2001 From: Eugen Hruska Date: Wed, 2 Mar 2016 11:29:23 -0600 Subject: [PATCH 01/22] new analysis feature: diffusion map --- package/MDAnalysis/analysis/diffusionmap.py | 273 ++++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 package/MDAnalysis/analysis/diffusionmap.py diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py new file mode 100644 index 00000000000..5ee3803dfc1 --- /dev/null +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -0,0 +1,273 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- http://www.MDAnalysis.org +# Copyright (c) 2006-2015 Naveen Michaud-Agrawal, Elizabeth J. Denning, Oliver Beckstein +# and contributors (see AUTHORS for the full list) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +Diffusion map --- :mod:`MDAnalysis.analysis.diffusionmap` +===================================================================== + +:Author: Eugen Hruska +:Year: 2016 +:Copyright: GNU Public License v3 + +BETTER +The module contains functions to fit a target structure to a reference +structure. They use the fast QCP algorithm to calculate the root mean +square distance (RMSD) between two coordinate sets [Theobald2005]_ and +the rotation matrix *R* that minimizes the RMSD [Liu2010]_. (Please +cite these references when using this module.). + +Typically, one selects a group of atoms (such as the C-alphas), +calculates the RMSD and transformation matrix, and applys the +transformation to the current frame of a trajectory to obtain the +rotated structure. The :func:`alignto` and :func:`rms_fit_trj` +functions can be used to do this for individual frames and +trajectories respectively. + +The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. + + +.. _Diffusion-Map-tutorial: + +Diffusion Map tutorial +-------------------- + +The example uses files provided as part of the MDAnalysis test suite +(in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and +:data:`~MDAnalysis.tests.datafiles.DCD`). First load all modules and test data :: + + >>> import MDAnalysis + >>> import numpy as np + >>> import MDAnalysis.lib.qcprot as qcp + >>> import MDAnalysis.analysis.diffusionmap as diffusionmap + >>> from MDAnalysis.tests.datafiles import PSF,DCD + + +In the simplest case, we can simply calculate the diffusion map from one trajectory :func:`diffusionmap`:: + + >>> u = MDAnalysis.Universe(PSF,DCD) + >>> eg,ev=diffusionmap.diffusionmap(u) + +Since o(N^2) long trajectories take too long, increase stride + +weight + +plot + +intro + +what eg, ev do +eg should after several steps fall off to 0 +ev[i,:] is the ith slowest eigenvector, this can be used to dimension reduction + +Other stuff in paper + + + >>> import matplotlib.pyplot as plt + >>> plt.plot(eg[:10]) + >>> plt.title('diffusion map eigenvalues') + >>> plt.xlabel("eigenvalue") + >>> plt.ylabel("index of eigenvalue") + >>> plt.show() + + +Common usage +------------ + +To **fit a single structure** with :func:`alignto`:: + + >>> ref = Universe(PSF, PDB_small) + >>> mobile = Universe(PSF, DCD) # we use the first frame + >>> alignto(mobile, ref, select="protein and name CA", mass_weighted=True) + +This will change *all* coordinates in *mobile* so that the protein +C-alpha atoms are optimally superimposed (translation and rotation). + + +Functions +--------- + +.. autofunction:: diffusionmap + + +""" + +import os.path +from six.moves import range, zip, zip_longest +import numpy as np +import warnings +import logging + + +import MDAnalysis.lib.qcprot as qcp +from MDAnalysis.exceptions import SelectionError, SelectionWarning +from MDAnalysis.lib.log import ProgressMeter +import MDAnalysis.analysis.rms as rms + + +logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') + + +def diffusionmap(u, distance='rmsd', type_epsilon='average', stride=1):#, select="all", mass_weighted=False,subselection=None, tol_mass=0.1, strict=False): + """Spatially align *mobile* to *reference* by doing a RMSD fit on *select* atoms. + + The superposition is done in the following way: + + 1. A rotation matrix is computed that minimizes the RMSD between + the coordinates of `mobile.select_atoms(sel1)` and + `reference.select_atoms(sel2)`; before the rotation, *mobile* is + translated so that its center of geometry (or center of mass) + coincides with the one of *reference*. (See below for explanation of + how *sel1* and *sel2* are derived from *select*.) + + 2. All atoms in :class:`~MDAnalysis.core.AtomGroup.Universe` that + contains *mobile* are shifted and rotated. (See below for how + to change this behavior through the *subselection* keyword.) + + The *mobile* and *reference* atom groups can be constructed so that they + already match atom by atom. In this case, *select* should be set to "all" + (or ``None``) so that no further selections are applied to *mobile* and + *reference*, therefore preserving the exact atom ordering (see + :ref:`ordered-selections-label`). + + .. Warning:: The atom order for *mobile* and *reference* is *only* + preserved when *select* is either "all" or ``None``. In any other case, + a new selection will be made that will sort the resulting AtomGroup by + index and therefore destroy the correspondence between the two groups. **It + is safest not to mix ordered AtomGroups with selection strings.** + + :Arguments: + *mobile* + structure to be aligned, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup` + or a whole :class:`~MDAnalysis.core.AtomGroup.Universe` + *reference* + reference structure, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup` + or a whole :class:`~MDAnalysis.core.AtomGroup.Universe` + *select* + 1. any valid selection string for + :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical + selections in *mobile* and *reference*; or + 2. dictionary ``{'mobile':sel1, 'reference':sel2}``. + (the :func:`fasta2select` function returns such a + dictionary based on a ClustalW_ or STAMP_ sequence alignment); or + 3. tuple ``(sel1, sel2)`` + + When using 2. or 3. with *sel1* and *sel2* then these selections can also each be + a list of selection strings (to generate a AtomGroup with defined atom order as + described under :ref:`ordered-selections-label`). + *mass_weighted* : boolean + ``True`` uses the masses :meth:`reference.masses` as weights for the + RMSD fit. + *tol_mass* + Reject match if the atomic masses for matched atoms differ by more than + *tol_mass* [0.1] + *strict* + ``True`` + Will raise :exc:`SelectioError` if a single atom does not + match between the two selections. + ``False`` [default] + Will try to prepare a matching selection by dropping + residues with non-matching atoms. See :func:`get_matching_atoms` + for details. + *subselection* + Apply the transformation only to this selection. + + ``None`` [default] + Apply to `mobile.universe.atoms` (i.e. all atoms in the + context of the selection from *mobile* such as the rest of a + protein, ligands and the surrounding water) + *selection-string* + Apply to `mobile.select_atoms(selection-string)` + :class:`~MDAnalysis.core.AtomGroup.AtomGroup` + Apply to the arbitrary group of atoms + + :Returns: eigenvalues, eigenvectors + + """ + + frames=u.trajectory + ref_atoms = u.select_atoms('all') + nframes=len(frames) + natoms = ref_atoms.n_atoms + + #define data + + #mobile_atoms = u.atoms + + rmsd_matrix=np.zeros((nframes,nframes), dtype=np.float64) + epsilon=np.zeros((nframes,), dtype=np.float64) + kernel2=np.zeros((nframes,nframes), dtype=np.float64) + + rmsd = np.zeros((nframes,)) + rot= np.zeros(9, dtype=np.float64) + weight=None + type_epsilon='average' + if weight==None: + weights=np.full((nframes,),1) + + + for i in range(nframes): + logger.info("calculating rmsd from structure "+str(i)+" to all") + i_ref=np.copy(u.trajectory[i].positions-ref_atoms.center_of_mass()) + #i_ref, ref_atoms.center_of_mass()#=i_ref-ref_atoms.center_of_mass() + #ref_atoms.center_of_mass() + for j in range(nframes): + j_ref=np.copy(u.trajectory[j].positions-ref_atoms.center_of_mass()) + rmsd_matrix[i,j] = qcp.CalcRMSDRotationalMatrix(i_ref.T.astype(np.float64), j_ref.T.astype(np.float64), natoms, rot, weight) + #print i,j,rmsd_matrix[i,j] + + #print rmsd_matrix[:10,:10] + #print rmsd_matrix.max() + #all_traj=u.trajectory[0] + #current_ref=u.trajectory[-1] + #rmsd_matrix[i,:] = rms.rmsd(u2.atoms.coordinates(),u.atoms.coordinates()) + + #calculate epsilons (average) + #rmsd_matrix[0,np.argsort(rmsd_matrix[0,:])[10]] + for i in range(nframes): + #np.argsort(rmsd_matrix[i,:])#[10]] + epsilon[i]=rmsd_matrix[i,np.argsort(rmsd_matrix[i,:])[10]] + + if type_epsilon=='average': + epsilon=np.full((nframes,),epsilon.mean()) + + logger.info("epsilon: "+str(epsilon)) + + #calculate exponetial to get + for i in range(nframes): + kernel2[i,:]=np.exp(-rmsd_matrix[i,:]**2/(epsilon[i]*epsilon[:])) + + #normalisation + p_vector = np.zeros((nframes,)) + d_vector = np.zeros((nframes,)) + for i in range(nframes): + p_vector[i]=np.dot(kernel2[i,:],weights) + + kernel2 /= np.sqrt(p_vector[:,np.newaxis].dot(p_vector[np.newaxis])) + + for i in range(nframes): + d_vector[i]=np.dot(kernel2[i,:],weights) + + for i in range(nframes): + kernel2[i,:]= kernel2[i,:]*weights + + kernel2 /= np.sqrt(d_vector[:,np.newaxis].dot(d_vector[np.newaxis])) + + eg, ev=np.linalg.eig(kernel2) + + if np.abs(eg[0]-1)>1.0e-14: + raise ValueError("Lowest eigenvalue should be 1") + + return eg, ev From 5fdd7b6e5352173b54885b74b991b78425f078a2 Mon Sep 17 00:00:00 2001 From: Eugen Hruska Date: Wed, 2 Mar 2016 12:46:24 -0600 Subject: [PATCH 02/22] description of diffusion map --- package/MDAnalysis/analysis/diffusionmap.py | 186 +++++++------------- 1 file changed, 64 insertions(+), 122 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 5ee3803dfc1..01fc8226574 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -22,19 +22,24 @@ :Year: 2016 :Copyright: GNU Public License v3 -BETTER -The module contains functions to fit a target structure to a reference -structure. They use the fast QCP algorithm to calculate the root mean -square distance (RMSD) between two coordinate sets [Theobald2005]_ and -the rotation matrix *R* that minimizes the RMSD [Liu2010]_. (Please -cite these references when using this module.). - -Typically, one selects a group of atoms (such as the C-alphas), -calculates the RMSD and transformation matrix, and applys the -transformation to the current frame of a trajectory to obtain the -rotated structure. The :func:`alignto` and :func:`rms_fit_trj` -functions can be used to do this for individual frames and -trajectories respectively. +The module contains the non-linear dimension reduction method diffusion map. +The diffusion map allows to get a quick estimate of the slowest collective +coordinates for a trajectory. This non-linear dimension reduction method +assumes that the trajectory is long enough to represents a probability +distribution of as protein close to the equilibrium. Further the diffusion map +assumes that the diffusion coefficients are constant. The eigenvectors with the +largest eigenvalues are the slowest collective coordinates. The complexity of +the diffusion map is O(N^2), where N is the number of frames in the trajectory. +Instead of a single trajectory a sample of protein structures can be used. +The sample should be equiblibrated, at least locally. Different weights can be used. +The order of the sampled structures in the trajectory is irrelevant. + +More details how the correst slowest collective coordinates can be calculated.in: +[1] Nadler, B, Lafon, S, Coifman, R. R, & Kevrekidis, I. G. (2013) Diffusion maps, +spectral clustering and reaction coordinates of dynamical systems. Applied and +Computational Harmonic Analysis 21, 113–127. +[2] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) Determi- nation +of reaction coordinates via locally scaled diffusion map. Journal of Chemical Physics. The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. @@ -46,7 +51,11 @@ The example uses files provided as part of the MDAnalysis test suite (in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and -:data:`~MDAnalysis.tests.datafiles.DCD`). First load all modules and test data :: +:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that these test data +aren't representing a sample from the equilibrium. This violates a basic +assumption of the diffusion map and the results shouldn't be interpreted for this reason. +This tutorial shows how to use the diffusionmap function. +First load all modules and test data :: >>> import MDAnalysis >>> import numpy as np @@ -54,32 +63,19 @@ >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF,DCD - In the simplest case, we can simply calculate the diffusion map from one trajectory :func:`diffusionmap`:: >>> u = MDAnalysis.Universe(PSF,DCD) >>> eg,ev=diffusionmap.diffusionmap(u) -Since o(N^2) long trajectories take too long, increase stride - -weight - -plot - -intro - -what eg, ev do -eg should after several steps fall off to 0 -ev[i,:] is the ith slowest eigenvector, this can be used to dimension reduction - -Other stuff in paper - +To see how the two slowest collective coordinates how the Other stuff in paper >>> import matplotlib.pyplot as plt - >>> plt.plot(eg[:10]) - >>> plt.title('diffusion map eigenvalues') - >>> plt.xlabel("eigenvalue") - >>> plt.ylabel("index of eigenvalue") + >>> import matplotlib.pyplot as plt + >>> plt.scatter(ev[:,1],ev[:,2]) + >>> plt.title('diffusion map') + >>> plt.xlabel("slowest collective coordinate") + >>> plt.ylabel("second slowest collective coordinate") >>> plt.show() @@ -120,92 +116,42 @@ logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') -def diffusionmap(u, distance='rmsd', type_epsilon='average', stride=1):#, select="all", mass_weighted=False,subselection=None, tol_mass=0.1, strict=False): - """Spatially align *mobile* to *reference* by doing a RMSD fit on *select* atoms. - - The superposition is done in the following way: +def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): + """ Non-linear dimension reduction method diffusion map + + The dimension reduction is done in the following way: - 1. A rotation matrix is computed that minimizes the RMSD between - the coordinates of `mobile.select_atoms(sel1)` and - `reference.select_atoms(sel2)`; before the rotation, *mobile* is - translated so that its center of geometry (or center of mass) - coincides with the one of *reference*. (See below for explanation of - how *sel1* and *sel2* are derived from *select*.) - - 2. All atoms in :class:`~MDAnalysis.core.AtomGroup.Universe` that - contains *mobile* are shifted and rotated. (See below for how - to change this behavior through the *subselection* keyword.) - - The *mobile* and *reference* atom groups can be constructed so that they - already match atom by atom. In this case, *select* should be set to "all" - (or ``None``) so that no further selections are applied to *mobile* and - *reference*, therefore preserving the exact atom ordering (see - :ref:`ordered-selections-label`). - - .. Warning:: The atom order for *mobile* and *reference* is *only* - preserved when *select* is either "all" or ``None``. In any other case, - a new selection will be made that will sort the resulting AtomGroup by - index and therefore destroy the correspondence between the two groups. **It - is safest not to mix ordered AtomGroups with selection strings.** + 1. A RMSD between each every pair of frames is calculated. + 2. The normalized kernel is obtain as in [1] and [2]. + 3. The eigenvalues and eigenvectors of the normalized kernel are the output. :Arguments: - *mobile* - structure to be aligned, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup` - or a whole :class:`~MDAnalysis.core.AtomGroup.Universe` - *reference* - reference structure, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup` - or a whole :class:`~MDAnalysis.core.AtomGroup.Universe` + *u* + trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` + The trajectory can be a long trajectory *select* 1. any valid selection string for - :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical - selections in *mobile* and *reference*; or - 2. dictionary ``{'mobile':sel1, 'reference':sel2}``. - (the :func:`fasta2select` function returns such a - dictionary based on a ClustalW_ or STAMP_ sequence alignment); or - 3. tuple ``(sel1, sel2)`` - - When using 2. or 3. with *sel1* and *sel2* then these selections can also each be - a list of selection strings (to generate a AtomGroup with defined atom order as - described under :ref:`ordered-selections-label`). - *mass_weighted* : boolean - ``True`` uses the masses :meth:`reference.masses` as weights for the - RMSD fit. - *tol_mass* - Reject match if the atomic masses for matched atoms differ by more than - *tol_mass* [0.1] - *strict* - ``True`` - Will raise :exc:`SelectioError` if a single atom does not - match between the two selections. - ``False`` [default] - Will try to prepare a matching selection by dropping - residues with non-matching atoms. See :func:`get_matching_atoms` - for details. - *subselection* - Apply the transformation only to this selection. - - ``None`` [default] - Apply to `mobile.universe.atoms` (i.e. all atoms in the - context of the selection from *mobile* such as the rest of a - protein, ligands and the surrounding water) - *selection-string* - Apply to `mobile.select_atoms(selection-string)` - :class:`~MDAnalysis.core.AtomGroup.AtomGroup` - Apply to the arbitrary group of atoms + :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` + This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. + *epsilon* : float + Specifies the epsilon used for the diffusion map. More information in [1] and [2] + With 'average' the average of the RMSD to the k-nearest-neighbor will be used. + *k* : specifies the k for the k-nearest-neighbor is average epsilon is used. + *weight* : None or numpy array of the same length as the trajectory + With 'None' the weight of each frame of the trajectory will be the same. + If order of the weights has to be the same as the order of framesin the trajectory. + :Returns: eigenvalues, eigenvectors + the second and higher eigenvectors ev[i+1,:] represent the i-th slowest collective coordinates. """ frames=u.trajectory - ref_atoms = u.select_atoms('all') + ref_atoms = u.select_atoms(select) nframes=len(frames) natoms = ref_atoms.n_atoms - #define data - - #mobile_atoms = u.atoms - rmsd_matrix=np.zeros((nframes,nframes), dtype=np.float64) epsilon=np.zeros((nframes,), dtype=np.float64) kernel2=np.zeros((nframes,nframes), dtype=np.float64) @@ -216,40 +162,35 @@ def diffusionmap(u, distance='rmsd', type_epsilon='average', stride=1):#, select type_epsilon='average' if weight==None: weights=np.full((nframes,),1) - + else: + if weight.shape[0]!=nframes: + raise ValueError("The weight should have the same length as the trajectroy") + else: + weights=weight for i in range(nframes): logger.info("calculating rmsd from structure "+str(i)+" to all") i_ref=np.copy(u.trajectory[i].positions-ref_atoms.center_of_mass()) - #i_ref, ref_atoms.center_of_mass()#=i_ref-ref_atoms.center_of_mass() - #ref_atoms.center_of_mass() for j in range(nframes): j_ref=np.copy(u.trajectory[j].positions-ref_atoms.center_of_mass()) rmsd_matrix[i,j] = qcp.CalcRMSDRotationalMatrix(i_ref.T.astype(np.float64), j_ref.T.astype(np.float64), natoms, rot, weight) - #print i,j,rmsd_matrix[i,j] - - #print rmsd_matrix[:10,:10] - #print rmsd_matrix.max() - #all_traj=u.trajectory[0] - #current_ref=u.trajectory[-1] - #rmsd_matrix[i,:] = rms.rmsd(u2.atoms.coordinates(),u.atoms.coordinates()) - #calculate epsilons (average) - #rmsd_matrix[0,np.argsort(rmsd_matrix[0,:])[10]] + #calculate epsilons for i in range(nframes): #np.argsort(rmsd_matrix[i,:])#[10]] - epsilon[i]=rmsd_matrix[i,np.argsort(rmsd_matrix[i,:])[10]] + epsilon[i]=rmsd_matrix[i,np.argsort(rmsd_matrix[i,:])[k]] - if type_epsilon=='average': + if epsilon=='average': epsilon=np.full((nframes,),epsilon.mean()) + else: + epsilon=np.full((nframes,),epsilon) logger.info("epsilon: "+str(epsilon)) - #calculate exponetial to get + #calculate normalized kernel for i in range(nframes): kernel2[i,:]=np.exp(-rmsd_matrix[i,:]**2/(epsilon[i]*epsilon[:])) - #normalisation p_vector = np.zeros((nframes,)) d_vector = np.zeros((nframes,)) for i in range(nframes): @@ -265,9 +206,10 @@ def diffusionmap(u, distance='rmsd', type_epsilon='average', stride=1):#, select kernel2 /= np.sqrt(d_vector[:,np.newaxis].dot(d_vector[np.newaxis])) + #eigenvalues and eigenvector are the collective coordinates eg, ev=np.linalg.eig(kernel2) if np.abs(eg[0]-1)>1.0e-14: - raise ValueError("Lowest eigenvalue should be 1") + raise ValueError("Lowest eigenvalue should be 1 up to numeric precision") return eg, ev From ce7dd68e97c1e65002b57f54359348449e5f3a41 Mon Sep 17 00:00:00 2001 From: Eugen Hruska Date: Wed, 2 Mar 2016 13:08:01 -0600 Subject: [PATCH 03/22] diffusionmap minor fix --- package/MDAnalysis/analysis/diffusionmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 01fc8226574..e4f3e300b1d 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -160,7 +160,7 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): rot= np.zeros(9, dtype=np.float64) weight=None type_epsilon='average' - if weight==None: + if weight is None: weights=np.full((nframes,),1) else: if weight.shape[0]!=nframes: From 6f8e8cc02341e55502e1116931c303265e1f13b4 Mon Sep 17 00:00:00 2001 From: Eugen Hruska Date: Wed, 2 Mar 2016 15:30:16 -0600 Subject: [PATCH 04/22] test for diffusionmap --- .../analysis/test_diffusionmap.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 testsuite/MDAnalysisTests/analysis/test_diffusionmap.py diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py new file mode 100644 index 00000000000..9344b011d87 --- /dev/null +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -0,0 +1,57 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- http://www.MDAnalysis.org +# Copyright (c) 2006-2015 Naveen Michaud-Agrawal, Elizabeth J. Denning, Oliver +# Beckstein and contributors (see AUTHORS for the full list) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from __future__ import print_function + +import MDAnalysis + +import MDAnalysis.analysis.align as align +import MDAnalysis.analysis.rms as rms +import MDAnalysis.lib.qcprot as qcp +import MDAnalysis.analysis.diffusionmap as diffusionmap +from MDAnalysis import SelectionError + +from numpy.testing import (TestCase, dec, + assert_almost_equal, assert_raises, assert_equal) +import numpy as np +from nose.plugins.attrib import attr + +import tempdir +from os import path + +from MDAnalysisTests.datafiles import PSF, DCD, FASTA +from MDAnalysisTests import executable_not_found, parser_not_found + + +class TestDiffusionmap(object): + def __init__(self): + u = MDAnalysis.Universe(PSF,DCD) + eg,ev=diffusionmap.diffusionmap(u) + self.eg=eg + self.ev=ev + + + def test_eg(self): + assert_equal(self.eg.shape, (98,)) + assert_almost_equal(self.eg[0], 1.0) + assert_almost_equal(self.eg[-1],0.03174182) + + + def test_ev(self): + assert_equal(self.ev.shape, (98,98)) + assert_almost_equal(self.ev[0,0], .095836037343022831) + + + From b6f8632754831e3c6d6600b4e04fce6f986d40c4 Mon Sep 17 00:00:00 2001 From: Eugen Hruska Date: Wed, 2 Mar 2016 15:55:50 -0600 Subject: [PATCH 05/22] diffusionmap faster test --- .../analysis/test_diffusionmap.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index 9344b011d87..b49e9760e4c 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -17,9 +17,6 @@ import MDAnalysis -import MDAnalysis.analysis.align as align -import MDAnalysis.analysis.rms as rms -import MDAnalysis.lib.qcprot as qcp import MDAnalysis.analysis.diffusionmap as diffusionmap from MDAnalysis import SelectionError @@ -31,27 +28,36 @@ import tempdir from os import path -from MDAnalysisTests.datafiles import PSF, DCD, FASTA +from MDAnalysisTests.datafiles import PDB,XTC from MDAnalysisTests import executable_not_found, parser_not_found class TestDiffusionmap(object): def __init__(self): - u = MDAnalysis.Universe(PSF,DCD) - eg,ev=diffusionmap.diffusionmap(u) + #slow 6s test + #u = MDAnalysis.Universe(PSF,DCD) + #eg,ev=diffusionmap.diffusionmap(u) + #assert_equal(self.eg.shape, (98,)) + #assert_almost_equal(self.eg[0], 1.0) + #assert_almost_equal(self.eg[-1],0.03174182) + #assert_equal(self.ev.shape, (98,98)) + #assert_almost_equal(self.ev[0,0], .095836037343022831) + #faster + u = MDAnalysis.Universe(PDB, XTC) + eg,ev=diffusionmap.diffusionmap(u, select='backbone', k=5) self.eg=eg self.ev=ev def test_eg(self): - assert_equal(self.eg.shape, (98,)) + assert_equal(self.eg.shape, (10,)) assert_almost_equal(self.eg[0], 1.0) - assert_almost_equal(self.eg[-1],0.03174182) + assert_almost_equal(self.eg[-1],0.03661048812801191) def test_ev(self): - assert_equal(self.ev.shape, (98,98)) - assert_almost_equal(self.ev[0,0], .095836037343022831) + assert_equal(self.ev.shape, (10,10)) + assert_almost_equal(self.ev[0,0], -0.30796900898350615) From e77350012ea7301e69eacfcb468488fff3a80c39 Mon Sep 17 00:00:00 2001 From: Eugen Hruska Date: Thu, 10 Mar 2016 18:44:18 -0600 Subject: [PATCH 06/22] included notes --- package/MDAnalysis/analysis/diffusionmap.py | 179 +++++++++--------- .../analysis/test_diffusionmap.py | 30 +-- 2 files changed, 102 insertions(+), 107 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index e4f3e300b1d..e9f26f8730b 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -34,15 +34,9 @@ The sample should be equiblibrated, at least locally. Different weights can be used. The order of the sampled structures in the trajectory is irrelevant. -More details how the correst slowest collective coordinates can be calculated.in: -[1] Nadler, B, Lafon, S, Coifman, R. R, & Kevrekidis, I. G. (2013) Diffusion maps, -spectral clustering and reaction coordinates of dynamical systems. Applied and -Computational Harmonic Analysis 21, 113–127. -[2] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) Determi- nation -of reaction coordinates via locally scaled diffusion map. Journal of Chemical Physics. - The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. +More details about diffusion maps are in [Lafon1]_ and [Clementi1]_. .. _Diffusion-Map-tutorial: @@ -59,18 +53,17 @@ >>> import MDAnalysis >>> import numpy as np - >>> import MDAnalysis.lib.qcprot as qcp >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF,DCD -In the simplest case, we can simply calculate the diffusion map from one trajectory :func:`diffusionmap`:: +In the simplest case, we can simply calculate the diffusion map from +one trajectory :func:`diffusionmap`:: >>> u = MDAnalysis.Universe(PSF,DCD) >>> eg,ev=diffusionmap.diffusionmap(u) To see how the two slowest collective coordinates how the Other stuff in paper - >>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt >>> plt.scatter(ev[:,1],ev[:,2]) >>> plt.title('diffusion map') @@ -79,137 +72,149 @@ >>> plt.show() -Common usage ------------- - -To **fit a single structure** with :func:`alignto`:: - - >>> ref = Universe(PSF, PDB_small) - >>> mobile = Universe(PSF, DCD) # we use the first frame - >>> alignto(mobile, ref, select="protein and name CA", mass_weighted=True) - -This will change *all* coordinates in *mobile* so that the protein -C-alpha atoms are optimally superimposed (translation and rotation). - - Functions --------- .. autofunction:: diffusionmap +References +--------- -""" - -import os.path -from six.moves import range, zip, zip_longest -import numpy as np -import warnings -import logging +If you use this QCP rotation calculation method in a publication, please +reference: +..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. +Appl. Comput. Harmon. Anal. 21, 5–30. +..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) +Determination of reaction coordinates via locally scaled +diffusion map. Journal of Chemical Physics. -import MDAnalysis.lib.qcprot as qcp -from MDAnalysis.exceptions import SelectionError, SelectionWarning -from MDAnalysis.lib.log import ProgressMeter -import MDAnalysis.analysis.rms as rms +""" -logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') +import logging +import numpy as np +import MDAnalysis.lib.qcprot as qcp +from six.moves import range def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): - """ Non-linear dimension reduction method diffusion map - - The dimension reduction is done in the following way: + """Non-linear dimension reduction method diffusion map - 1. A RMSD between each every pair of frames is calculated. - 2. The normalized kernel is obtain as in [1] and [2]. - 3. The eigenvalues and eigenvectors of the normalized kernel are the output. + diffusionmap(u, select, epsilon, k, weight) - :Arguments: + Dimension reduction with diffusion map of the structures in the universe. + + Parameters + ------------- *u* trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` The trajectory can be a long trajectory - *select* + select: str, optional 1. any valid selection string for :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. - *epsilon* : float + epsilon : float, optional Specifies the epsilon used for the diffusion map. More information in [1] and [2] With 'average' the average of the RMSD to the k-nearest-neighbor will be used. - *k* : specifies the k for the k-nearest-neighbor is average epsilon is used. - *weight* : None or numpy array of the same length as the trajectory + k : int, optional + specifies the k for the k-nearest-neighbor is average epsilon is used. + weight: numpy array, optional + The numpy array has to have the same length as the trajectory. With 'None' the weight of each frame of the trajectory will be the same. If order of the weights has to be the same as the order of framesin the trajectory. - :Returns: eigenvalues, eigenvectors - the second and higher eigenvectors ev[i+1,:] represent the i-th slowest collective coordinates. + Returns + ---------------- + eigenvalues: numpy array + Eigenvalues of the diffusion map + eigenvectors: numpy array + Eigenvectors of the diffusion map + The second and higher eigenvectors ev[i+1,:] represent the i-th slowest collective coordinates. - """ + Notes + --------------- + + The dimension reduction works in the following way: - frames=u.trajectory + 1. A RMSD between each every pair of frames is calculated. + 2. The normalized kernel is obtain as in [1] and [2]. + 3. The eigenvalues and eigenvectors of the normalized kernel are the output. + + """ + logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') + frames = u.trajectory ref_atoms = u.select_atoms(select) - nframes=len(frames) + nframes = len(frames) natoms = ref_atoms.n_atoms - rmsd_matrix=np.zeros((nframes,nframes), dtype=np.float64) - epsilon=np.zeros((nframes,), dtype=np.float64) - kernel2=np.zeros((nframes,nframes), dtype=np.float64) + rmsd_matrix = np.zeros((nframes, nframes)) + kernel2 = np.zeros((nframes, nframes)) + + if epsilon == 'average': + epsilon = np.zeros((nframes, ), ) + type_epsilon = 'average' + else: + value_epsilon = epsilon + epsilon = np.full((nframes, ), value_epsilon) + type_epsilon = 'constant' + - rmsd = np.zeros((nframes,)) - rot= np.zeros(9, dtype=np.float64) - weight=None - type_epsilon='average' + rot = np.zeros(9) if weight is None: - weights=np.full((nframes,),1) + weights = np.full((nframes, ), 1) else: - if weight.shape[0]!=nframes: - raise ValueError("The weight should have the same length as the trajectroy") - else: - weights=weight + if weight.shape[0] != nframes: + raise ValueError("The weight should have the same length as the trajectroy") + else: + weights = weight for i in range(nframes): - logger.info("calculating rmsd from structure "+str(i)+" to all") - i_ref=np.copy(u.trajectory[i].positions-ref_atoms.center_of_mass()) - for j in range(nframes): - j_ref=np.copy(u.trajectory[j].positions-ref_atoms.center_of_mass()) - rmsd_matrix[i,j] = qcp.CalcRMSDRotationalMatrix(i_ref.T.astype(np.float64), j_ref.T.astype(np.float64), natoms, rot, weight) + logger.info("calculating rmsd from structure {0} to all".format(i)) + i_ref = np.copy(u.trajectory[i].positions-ref_atoms.center_of_mass()) + for j in range(i, nframes): + j_ref = np.copy(u.trajectory[j].positions-ref_atoms.center_of_mass()) + rmsd_matrix[i, j] = qcp.CalcRMSDRotationalMatrix(i_ref.T.astype(np.float64), \ + j_ref.T.astype(np.float64), natoms, rot, weight) + + #fill in symmetric values + rmsd_matrix = rmsd_matrix + rmsd_matrix.T - np.diag(rmsd_matrix.diagonal()) #calculate epsilons - for i in range(nframes): - #np.argsort(rmsd_matrix[i,:])#[10]] - epsilon[i]=rmsd_matrix[i,np.argsort(rmsd_matrix[i,:])[k]] + if type_epsilon == 'average': + for i in range(nframes): + #np.argsort(rmsd_matrix[i,:])#[10]] + epsilon[i] = rmsd_matrix[i, np.argsort(rmsd_matrix[i, :])[k]] + epsilon = np.full((nframes, ), epsilon.mean()) - if epsilon=='average': - epsilon=np.full((nframes,),epsilon.mean()) - else: - epsilon=np.full((nframes,),epsilon) - logger.info("epsilon: "+str(epsilon)) + logger.info('epsilon: {0}'.format(epsilon)) #calculate normalized kernel for i in range(nframes): - kernel2[i,:]=np.exp(-rmsd_matrix[i,:]**2/(epsilon[i]*epsilon[:])) + kernel2[i, :] = np.exp(-rmsd_matrix[i, :]**2/(epsilon[i]*epsilon[:])) - p_vector = np.zeros((nframes,)) - d_vector = np.zeros((nframes,)) + p_vector = np.zeros((nframes, )) + d_vector = np.zeros((nframes, )) for i in range(nframes): - p_vector[i]=np.dot(kernel2[i,:],weights) + p_vector[i] = np.dot(kernel2[i, :], weights) - kernel2 /= np.sqrt(p_vector[:,np.newaxis].dot(p_vector[np.newaxis])) + kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) for i in range(nframes): - d_vector[i]=np.dot(kernel2[i,:],weights) + d_vector[i] = np.dot(kernel2[i, :], weights) for i in range(nframes): - kernel2[i,:]= kernel2[i,:]*weights + kernel2[i, :] = kernel2[i, :]*weights - kernel2 /= np.sqrt(d_vector[:,np.newaxis].dot(d_vector[np.newaxis])) + kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) #eigenvalues and eigenvector are the collective coordinates - eg, ev=np.linalg.eig(kernel2) + eg, ev = np.linalg.eig(kernel2) - if np.abs(eg[0]-1)>1.0e-14: - raise ValueError("Lowest eigenvalue should be 1 up to numeric precision") + eg_arg = np.argsort(eg) + eg = eg[eg_arg[::-1]] + ev = ev[eg_arg[::-1],:] return eg, ev diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index b49e9760e4c..8242d5890e6 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -14,22 +14,12 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from __future__ import print_function - import MDAnalysis - import MDAnalysis.analysis.diffusionmap as diffusionmap -from MDAnalysis import SelectionError - -from numpy.testing import (TestCase, dec, - assert_almost_equal, assert_raises, assert_equal) -import numpy as np -from nose.plugins.attrib import attr +from numpy.testing import (assert_almost_equal, assert_equal) -import tempdir -from os import path -from MDAnalysisTests.datafiles import PDB,XTC -from MDAnalysisTests import executable_not_found, parser_not_found +from MDAnalysisTests.datafiles import PDB, XTC class TestDiffusionmap(object): @@ -44,20 +34,20 @@ def __init__(self): #assert_almost_equal(self.ev[0,0], .095836037343022831) #faster u = MDAnalysis.Universe(PDB, XTC) - eg,ev=diffusionmap.diffusionmap(u, select='backbone', k=5) - self.eg=eg - self.ev=ev + eg, ev = diffusionmap.diffusionmap(u, select = 'backbone', k = 5) + self.eg = eg + self.ev = ev def test_eg(self): - assert_equal(self.eg.shape, (10,)) - assert_almost_equal(self.eg[0], 1.0) - assert_almost_equal(self.eg[-1],0.03661048812801191) + assert_equal(self.eg.shape, (10, )) + assert_almost_equal(self.eg[0], 1.0, decimal=5) + assert_almost_equal(self.eg[-1], 0.0142, decimal = 3) def test_ev(self): - assert_equal(self.ev.shape, (10,10)) - assert_almost_equal(self.ev[0,0], -0.30796900898350615) + assert_equal(self.ev.shape, (10, 10)) + assert_almost_equal(self.ev[0, 0], -0.3019, decimal = 2) From f2ae4a406aca350b6940eb5525d2e3386bb79193 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Mon, 23 May 2016 21:08:24 -0700 Subject: [PATCH 07/22] initial work for refactor --- package/MDAnalysis/analysis/align.py | 1 + package/MDAnalysis/analysis/diffusionmap.py | 197 ++++++++++++++++---- 2 files changed, 166 insertions(+), 32 deletions(-) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 4b1c2c755a0..4c9b2f5908c 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -259,6 +259,7 @@ def rotation_matrix(a, b, weights=None): # so that R acts **to the left** and can be broadcasted; we're saving # one transpose. [orbeckst]) rmsd = qcp.CalcRMSDRotationalMatrix(a.T, b.T, N, rot, weights) + logger.info("qcp: %d", rmsd) return np.matrix(rot.reshape(3, 3)), rmsd diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index e9f26f8730b..aa0a6e34e12 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -23,15 +23,15 @@ :Copyright: GNU Public License v3 The module contains the non-linear dimension reduction method diffusion map. -The diffusion map allows to get a quick estimate of the slowest collective -coordinates for a trajectory. This non-linear dimension reduction method -assumes that the trajectory is long enough to represents a probability -distribution of as protein close to the equilibrium. Further the diffusion map -assumes that the diffusion coefficients are constant. The eigenvectors with the -largest eigenvalues are the slowest collective coordinates. The complexity of -the diffusion map is O(N^2), where N is the number of frames in the trajectory. -Instead of a single trajectory a sample of protein structures can be used. -The sample should be equiblibrated, at least locally. Different weights can be used. +The diffusion map allows to get a quick estimate of the slowest collective +coordinates for a trajectory. This non-linear dimension reduction method +assumes that the trajectory is long enough to represent a probability +distribution of as protein close to the equilibrium. Furthermore, the diffusion map +assumes that the diffusion coefficients are constant. The eigenvectors with the +largest eigenvalues are the slowest collective coordinates. The complexity of +the diffusion map is O(N^3), where N is the number of frames in the trajectory. +Instead of a single trajectory a sample of protein structures can be used. +The sample should be equiblibrated, at least locally. Different weights can be used. The order of the sampled structures in the trajectory is irrelevant. The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. @@ -45,9 +45,9 @@ The example uses files provided as part of the MDAnalysis test suite (in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and -:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that these test data -aren't representing a sample from the equilibrium. This violates a basic -assumption of the diffusion map and the results shouldn't be interpreted for this reason. +:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that these test data +aren't representing a sample from the equilibrium. This violates a basic +assumption of the diffusion map and the results shouldn't be interpreted for this reason. This tutorial shows how to use the diffusionmap function. First load all modules and test data :: @@ -56,11 +56,12 @@ >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF,DCD -In the simplest case, we can simply calculate the diffusion map from +In the simplest case, we can simply calculate the diffusion map from one trajectory :func:`diffusionmap`:: >>> u = MDAnalysis.Universe(PSF,DCD) - >>> eg,ev=diffusionmap.diffusionmap(u) + >>> dmap = diffusionmap.DiffusionMap(u) + >>> eg,ev= diffusionmap.DiffusionMap(u).run() To see how the two slowest collective coordinates how the Other stuff in paper @@ -71,21 +72,21 @@ >>> plt.ylabel("second slowest collective coordinate") >>> plt.show() +Classes +------- -Functions ---------- - -.. autofunction:: diffusionmap +.. autoclass:: Contacts + :members: References --------- If you use this QCP rotation calculation method in a publication, please reference: -..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. +..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. Appl. Comput. Harmon. Anal. 21, 5–30. -..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) -Determination of reaction coordinates via locally scaled +..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) +Determination of reaction coordinates via locally scaled diffusion map. Journal of Chemical Physics. @@ -94,9 +95,12 @@ import logging import numpy as np +from deco import * import MDAnalysis.lib.qcprot as qcp from six.moves import range +from .base import AnalysisBase +logger = logging.getLogger("MDAnalysis.analysis.diffusionmap") def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): """Non-linear dimension reduction method diffusion map @@ -107,16 +111,15 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): Parameters ------------- - *u* - trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` - The trajectory can be a long trajectory - select: str, optional + u : trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` + Remember that eigenvalue decomposition scales at O(n^3) + select: str, optional 1. any valid selection string for - :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` + :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. - epsilon : float, optional - Specifies the epsilon used for the diffusion map. More information in [1] and [2] - With 'average' the average of the RMSD to the k-nearest-neighbor will be used. + epsilon : float, optional + Specifies the epsilon used for the diffusion map. More information in [1] and [2] + With 'average' the average of the RMSD to the k-nearest-neighbor will be used. k : int, optional specifies the k for the k-nearest-neighbor is average epsilon is used. weight: numpy array, optional @@ -135,7 +138,7 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): Notes --------------- - + The dimension reduction works in the following way: 1. A RMSD between each every pair of frames is calculated. @@ -143,7 +146,6 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): 3. The eigenvalues and eigenvectors of the normalized kernel are the output. """ - logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') frames = u.trajectory ref_atoms = u.select_atoms(select) nframes = len(frames) @@ -181,7 +183,7 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): #fill in symmetric values rmsd_matrix = rmsd_matrix + rmsd_matrix.T - np.diag(rmsd_matrix.diagonal()) - #calculate epsilons + #calculate epsilons if type_epsilon == 'average': for i in range(nframes): #np.argsort(rmsd_matrix[i,:])#[10]] @@ -218,3 +220,134 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): ev = ev[eg_arg[::-1],:] return eg, ev + +class DiffusionMap(AnalysisBase): + def __init__(self, u, select='all', epsilon='average', k=10, weights=None, + metric=None, start=None, stop=None, step=None): + """ + Parameters + ------------- + u : trajectory `~MDAnalysis.core.AtomGroup.Universe` + The MD Trajectory for dimension reduction, remember that computational + scales at O(n^3). Cost can be reduced by increasing step interval or + specifying start and stop + select: str, optional + 1. any valid selection string for + :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` + This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. + epsilon : float, optional + Specifies the epsilon used for the diffusion map. More information in [1] and [2] + With 'average' the average of the RMSD to the k-nearest-neighbor will be used. + k : int, optional + specifies the k for the k-nearest-neighbor is average epsilon is used. + weights: list, optional + The list has to have the same length as the trajectory. + With 'None' the weight of each frame of the trajectory will be the same. + + metric : function, optional + Maps two numpy arrays to a float, is positive definite and symmetric. + start : int, optional + First frame of trajectory to analyse, Default: 0 + stop : int, optional + Last frame of trajectory to analyse, Default: -1 + step : int, optional + Step between frames to analyse, Default: 1 + """ + self.u = u + self.atoms = u.select_atoms(select) + self.natoms = self.atoms.n_atoms + self.k = k + frames = u.trajectory + self.epsilon = epsilon + if metric is not None: + self.metric = metric + else: + self.metric = qcp.CalcRMSDRotationalMatrix + + self._setup_frames(frames, start, stop, step) + + if weights is None: + self.weights = np.ones((self.nframes,)) + else: + if weights.shape[0] != self.nframes: + raise ValueError("The weight should have the same length as the trajectroy") + else: + # weights are constructed as relative to the mean + self.weights = np.asarray(weights, dtype=np.float64) / np.mean(weights) + + def _prepare(self): + self.rmsd_matrix = np.zeros((self.nframes,self.nframes)) + + if self.epsilon == 'average': + self.epsilon = np.zeros((self.nframes, ), ) + self.type_epsilon = 'average' + else: + value_epsilon = epsilon + self.epsilon = np.full((nframes, ), value_epsilon) + self.type_epsilon = 'constant' + + self.rot = np.zeros(9) + + #mappable function + def calc_diffusion(self, traj_index): + """Calculates diffusion distance from metric function + rmsd_matrix will be 0's in the lower triangle. + """ + logger.info("calculating rmsd from structure {0} to all".format(traj_index)) + i_ref = np.copy(self.u.trajectory[traj_index].positions-self.atoms.center_of_mass()) + + for j in range(traj_index, self.nframes): + j_ref = np.copy(self.u.trajectory[j].positions-self.atoms.center_of_mass()) + logger.info('_ts.frame {0}'.format(self._ts.frame)) + self.rmsd_matrix[traj_index, j] = self.metric(i_ref.T.astype(np.float64),\ + j_ref.T.astype(np.float64), self.natoms, self.rot, weights=None) + + def _single_frame(self): + logger.info("_ts.frame {0}, numframes{1}".format(self._ts.frame, self.nframes)) + self.calc_diffusion(self._ts.frame) + + def _conclude(self): + + logger.info('rmsd_matrix: {0}'.format(self.rmsd_matrix)) + + self.rmsd_matrix = self.rmsd_matrix + self.rmsd_matrix.T - \ + np.diag(self.rmsd_matrix.diagonal()) + if self.type_epsilon == 'average': + for i in range(self.nframes): + #np.argsort(rmsd_matrix[i,:])#[10]] + self.epsilon[i] = self.rmsd_matrix[i, \ + np.argsort(self.rmsd_matrix[i, :])[self.k]] + + self.epsilon = np.full((self.nframes, ), self.epsilon.mean()) + + logger.info('epsilon: {0}'.format(self.epsilon)) + + self.kernel2 = np.zeros((self.nframes, self.nframes)) + + #possibly mappable + for i in range(self.nframes): + self.kernel2[i, :] = np.exp(-self.rmsd_matrix[i, :]**2/(self.epsilon[i]*self.epsilon[:])) + + p_vector = np.zeros((self.nframes, )) + d_vector = np.zeros((self.nframes, )) + + for i in range(self.nframes): + p_vector[i] = np.dot(self.kernel2[i, :], self.weights) + + self.kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) + + for i in range(self.nframes): + d_vector[i] = np.dot(self.kernel2[i, :], self.weights) + + for i in range(self.nframes): + self.kernel2[i, :] = self.kernel2[i, :]*self.weights + + self.kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) + + logger.info('kernel: {0}'.format(self.kernel2)) + #eigenvalues and eigenvector are the collective coordinates + eg, ev = np.linalg.eig(self.kernel2) + + eg_arg = np.argsort(eg) + self.eg = eg[eg_arg[::-1]] + self.ev = ev[eg_arg[::-1],:] From a14242206e3fa9bc5ad27d78924ee2d38eb43536 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Sun, 29 May 2016 13:49:17 -0700 Subject: [PATCH 08/22] refactor of @euhruska diffusion map --- package/MDAnalysis/analysis/diffusionmap.py | 186 ++++---------------- 1 file changed, 33 insertions(+), 153 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index aa0a6e34e12..e8eaa194a25 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -18,12 +18,12 @@ Diffusion map --- :mod:`MDAnalysis.analysis.diffusionmap` ===================================================================== -:Author: Eugen Hruska +:Authors: Eugen Hruska, John Detlefs :Year: 2016 :Copyright: GNU Public License v3 The module contains the non-linear dimension reduction method diffusion map. -The diffusion map allows to get a quick estimate of the slowest collective +The diffusion map provides an estimate of the slowest collective coordinates for a trajectory. This non-linear dimension reduction method assumes that the trajectory is long enough to represent a probability distribution of as protein close to the equilibrium. Furthermore, the diffusion map @@ -45,8 +45,8 @@ The example uses files provided as part of the MDAnalysis test suite (in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and -:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that these test data -aren't representing a sample from the equilibrium. This violates a basic +:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that this trajectory +does not represent a sample from the equilibrium. This violates a basic assumption of the diffusion map and the results shouldn't be interpreted for this reason. This tutorial shows how to use the diffusionmap function. First load all modules and test data :: @@ -56,32 +56,26 @@ >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF,DCD -In the simplest case, we can simply calculate the diffusion map from -one trajectory :func:`diffusionmap`:: +Given an universe or atom group, we can calculate the diffusion map from +that trajectory using :class:`DiffusionMap`:: and get the corresponding eigenvalues +and eigenvectors >>> u = MDAnalysis.Universe(PSF,DCD) >>> dmap = diffusionmap.DiffusionMap(u) - >>> eg,ev= diffusionmap.DiffusionMap(u).run() - -To see how the two slowest collective coordinates how the Other stuff in paper - - >>> import matplotlib.pyplot as plt - >>> plt.scatter(ev[:,1],ev[:,2]) - >>> plt.title('diffusion map') - >>> plt.xlabel("slowest collective coordinate") - >>> plt.ylabel("second slowest collective coordinate") - >>> plt.show() + >>> dmap.run() + >>> eigenvalues = dmap.eigenvalues + >>> eigenvectors = dmap.eigenvectors Classes ------- -.. autoclass:: Contacts +.. autoclass:: DiffusionMap :members: References --------- -If you use this QCP rotation calculation method in a publication, please +If you use this Dimension Reduction method in a publication, please reference: ..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. Appl. Comput. Harmon. Anal. 21, 5–30. @@ -89,139 +83,38 @@ Determination of reaction coordinates via locally scaled diffusion map. Journal of Chemical Physics. +.. If you choose the default metric, this module uses the fast QCP algorithm +[Theobald2005]_ to calculate the root mean square distance (RMSD) between +two coordinate sets (as implemented in :func:`MDAnalysis.lib.qcprot.CalcRMSDRotationalMatrix`). +When using this module in published work please cite [Theobald2005]_. """ import logging import numpy as np -from deco import * import MDAnalysis.lib.qcprot as qcp from six.moves import range from .base import AnalysisBase logger = logging.getLogger("MDAnalysis.analysis.diffusionmap") -def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): - """Non-linear dimension reduction method diffusion map - diffusionmap(u, select, epsilon, k, weight) +class DiffusionMap(AnalysisBase): + """Non-linear dimension reduction method Dimension reduction with diffusion map of the structures in the universe. - Parameters - ------------- - u : trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` - Remember that eigenvalue decomposition scales at O(n^3) - select: str, optional - 1. any valid selection string for - :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` - This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. - epsilon : float, optional - Specifies the epsilon used for the diffusion map. More information in [1] and [2] - With 'average' the average of the RMSD to the k-nearest-neighbor will be used. - k : int, optional - specifies the k for the k-nearest-neighbor is average epsilon is used. - weight: numpy array, optional - The numpy array has to have the same length as the trajectory. - With 'None' the weight of each frame of the trajectory will be the same. - If order of the weights has to be the same as the order of framesin the trajectory. - - - Returns - ---------------- + Attributes + ---------- eigenvalues: numpy array Eigenvalues of the diffusion map eigenvectors: numpy array Eigenvectors of the diffusion map The second and higher eigenvectors ev[i+1,:] represent the i-th slowest collective coordinates. - Notes - --------------- - - The dimension reduction works in the following way: - - 1. A RMSD between each every pair of frames is calculated. - 2. The normalized kernel is obtain as in [1] and [2]. - 3. The eigenvalues and eigenvectors of the normalized kernel are the output. - """ - frames = u.trajectory - ref_atoms = u.select_atoms(select) - nframes = len(frames) - natoms = ref_atoms.n_atoms - - rmsd_matrix = np.zeros((nframes, nframes)) - kernel2 = np.zeros((nframes, nframes)) - - if epsilon == 'average': - epsilon = np.zeros((nframes, ), ) - type_epsilon = 'average' - else: - value_epsilon = epsilon - epsilon = np.full((nframes, ), value_epsilon) - type_epsilon = 'constant' - - - rot = np.zeros(9) - if weight is None: - weights = np.full((nframes, ), 1) - else: - if weight.shape[0] != nframes: - raise ValueError("The weight should have the same length as the trajectroy") - else: - weights = weight - - for i in range(nframes): - logger.info("calculating rmsd from structure {0} to all".format(i)) - i_ref = np.copy(u.trajectory[i].positions-ref_atoms.center_of_mass()) - for j in range(i, nframes): - j_ref = np.copy(u.trajectory[j].positions-ref_atoms.center_of_mass()) - rmsd_matrix[i, j] = qcp.CalcRMSDRotationalMatrix(i_ref.T.astype(np.float64), \ - j_ref.T.astype(np.float64), natoms, rot, weight) - - #fill in symmetric values - rmsd_matrix = rmsd_matrix + rmsd_matrix.T - np.diag(rmsd_matrix.diagonal()) - - #calculate epsilons - if type_epsilon == 'average': - for i in range(nframes): - #np.argsort(rmsd_matrix[i,:])#[10]] - epsilon[i] = rmsd_matrix[i, np.argsort(rmsd_matrix[i, :])[k]] - epsilon = np.full((nframes, ), epsilon.mean()) - - - logger.info('epsilon: {0}'.format(epsilon)) - - #calculate normalized kernel - for i in range(nframes): - kernel2[i, :] = np.exp(-rmsd_matrix[i, :]**2/(epsilon[i]*epsilon[:])) - - p_vector = np.zeros((nframes, )) - d_vector = np.zeros((nframes, )) - for i in range(nframes): - p_vector[i] = np.dot(kernel2[i, :], weights) - - kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) - - for i in range(nframes): - d_vector[i] = np.dot(kernel2[i, :], weights) - - for i in range(nframes): - kernel2[i, :] = kernel2[i, :]*weights - kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) - - #eigenvalues and eigenvector are the collective coordinates - eg, ev = np.linalg.eig(kernel2) - - eg_arg = np.argsort(eg) - eg = eg[eg_arg[::-1]] - ev = ev[eg_arg[::-1],:] - - return eg, ev - -class DiffusionMap(AnalysisBase): def __init__(self, u, select='all', epsilon='average', k=10, weights=None, metric=None, start=None, stop=None, step=None): """ @@ -229,8 +122,8 @@ def __init__(self, u, select='all', epsilon='average', k=10, weights=None, ------------- u : trajectory `~MDAnalysis.core.AtomGroup.Universe` The MD Trajectory for dimension reduction, remember that computational - scales at O(n^3). Cost can be reduced by increasing step interval or - specifying start and stop + cost scales at O(n^3). Cost can be reduced by increasing step interval or + specifying a start and stop select: str, optional 1. any valid selection string for :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` @@ -243,9 +136,9 @@ def __init__(self, u, select='all', epsilon='average', k=10, weights=None, weights: list, optional The list has to have the same length as the trajectory. With 'None' the weight of each frame of the trajectory will be the same. - metric : function, optional - Maps two numpy arrays to a float, is positive definite and symmetric. + Maps two numpy arrays to a float, is positive definite and symmetric, + Default: ``None`` sets metric to qcp.CalcRMSDRotationalMatrix start : int, optional First frame of trajectory to analyse, Default: 0 stop : int, optional @@ -288,30 +181,21 @@ def _prepare(self): self.rot = np.zeros(9) - #mappable function - def calc_diffusion(self, traj_index): - """Calculates diffusion distance from metric function - rmsd_matrix will be 0's in the lower triangle. - """ - logger.info("calculating rmsd from structure {0} to all".format(traj_index)) + def _single_frame(self): + logger.info("_ts.frame {0}, numframes{1}".format(self._ts.frame, self.nframes)) + traj_index = self._ts.frame i_ref = np.copy(self.u.trajectory[traj_index].positions-self.atoms.center_of_mass()) - for j in range(traj_index, self.nframes): + #diagonal entries need not be calculated due to metric(x,x) == 0 in theory + for j in range(self.nframes-1, self._ts.frame-1, -1): j_ref = np.copy(self.u.trajectory[j].positions-self.atoms.center_of_mass()) - logger.info('_ts.frame {0}'.format(self._ts.frame)) self.rmsd_matrix[traj_index, j] = self.metric(i_ref.T.astype(np.float64),\ - j_ref.T.astype(np.float64), self.natoms, self.rot, weights=None) - - def _single_frame(self): - logger.info("_ts.frame {0}, numframes{1}".format(self._ts.frame, self.nframes)) - self.calc_diffusion(self._ts.frame) + j_ref.T.astype(np.float64), self.natoms, self.rot, self.weights) def _conclude(self): - - logger.info('rmsd_matrix: {0}'.format(self.rmsd_matrix)) - self.rmsd_matrix = self.rmsd_matrix + self.rmsd_matrix.T - \ np.diag(self.rmsd_matrix.diagonal()) + if self.type_epsilon == 'average': for i in range(self.nframes): #np.argsort(rmsd_matrix[i,:])#[10]] @@ -320,8 +204,6 @@ def _conclude(self): self.epsilon = np.full((self.nframes, ), self.epsilon.mean()) - logger.info('epsilon: {0}'.format(self.epsilon)) - self.kernel2 = np.zeros((self.nframes, self.nframes)) #possibly mappable @@ -343,11 +225,9 @@ def _conclude(self): self.kernel2[i, :] = self.kernel2[i, :]*self.weights self.kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) - - logger.info('kernel: {0}'.format(self.kernel2)) #eigenvalues and eigenvector are the collective coordinates eg, ev = np.linalg.eig(self.kernel2) eg_arg = np.argsort(eg) - self.eg = eg[eg_arg[::-1]] - self.ev = ev[eg_arg[::-1],:] + self.eigenvalues = eg[eg_arg[::-1]] + self.eigenvectors = ev[eg_arg[::-1],:] From 38f2352712141b3277ee34522732a98d8677d720 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Sun, 29 May 2016 17:15:11 -0700 Subject: [PATCH 09/22] added tests, fixed logger issue style fixes, protection fixes, added docs fix of some broken areas fixed selections and updated tests accordingly switched order of imports [skip ci] --- package/CHANGELOG | 4 +- package/MDAnalysis/analysis/align.py | 1 - package/MDAnalysis/analysis/diffusionmap.py | 169 +++++++++++------- .../analysis/test_diffusionmap.py | 60 ++++--- 4 files changed, 143 insertions(+), 91 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index a01407ba373..1a8b48075d8 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -30,7 +30,9 @@ Changes * Added protected variable _frame_index to to keep track of frame iteration number in AnalysisBase * Added new AlignTraj class for alignment that follows the new analysis API known - as Bauhaus style. + as Bauhaus style. (Issue #845) + * Added new diffusionmap module for dimension reduction that follows the + Bauhaus API. (Issue #857) Deprecations (Issue #599) * Use of rms_fit_trj deprecated in favor of AlignTraj class (Issue #845) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 4c9b2f5908c..4b1c2c755a0 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -259,7 +259,6 @@ def rotation_matrix(a, b, weights=None): # so that R acts **to the left** and can be broadcasted; we're saving # one transpose. [orbeckst]) rmsd = qcp.CalcRMSDRotationalMatrix(a.T, b.T, N, rot, weights) - logger.info("qcp: %d", rmsd) return np.matrix(rot.reshape(3, 3)), rmsd diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index e8eaa194a25..1a7c6e7e51c 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -26,15 +26,16 @@ The diffusion map provides an estimate of the slowest collective coordinates for a trajectory. This non-linear dimension reduction method assumes that the trajectory is long enough to represent a probability -distribution of as protein close to the equilibrium. Furthermore, the diffusion map -assumes that the diffusion coefficients are constant. The eigenvectors with the -largest eigenvalues are the slowest collective coordinates. The complexity of +distribution of as protein close to the equilibrium. Furthermore, the diffusion +map assumes that the diffusion coefficients are constant. The eigenvectors with +the largest eigenvalues are the slowest collective coordinates. The complexity of the diffusion map is O(N^3), where N is the number of frames in the trajectory. Instead of a single trajectory a sample of protein structures can be used. -The sample should be equiblibrated, at least locally. Different weights can be used. -The order of the sampled structures in the trajectory is irrelevant. +The sample should be equiblibrated, at least locally. Different weights can be +used. The order of the sampled structures in the trajectory is irrelevant. -The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. +The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension +reduction. More details about diffusion maps are in [Lafon1]_ and [Clementi1]_. @@ -90,11 +91,12 @@ """ - +from six.moves import range import logging import numpy as np + import MDAnalysis.lib.qcprot as qcp -from six.moves import range + from .base import AnalysisBase logger = logging.getLogger("MDAnalysis.analysis.diffusionmap") @@ -107,35 +109,45 @@ class DiffusionMap(AnalysisBase): Attributes ---------- - eigenvalues: numpy array - Eigenvalues of the diffusion map - eigenvectors: numpy array - Eigenvectors of the diffusion map - The second and higher eigenvectors ev[i+1,:] represent the i-th slowest collective coordinates. + atoms : AtomGroup + Selected atoms in trajectory subject to dimension reduction + diffusion_matrix : array + Array of all possible ij metric distances between frames in trajectory + eigenvalues: array + Eigenvalues of the diffusion map + eigenvectors: array + Eigenvectors of the diffusion map + The second and higher eigenvectors ev[i+1,:] represent the i-th slowest + collective coordinates. """ def __init__(self, u, select='all', epsilon='average', k=10, weights=None, - metric=None, start=None, stop=None, step=None): + metric=None, start=None, stop=None, step=None): """ Parameters ------------- u : trajectory `~MDAnalysis.core.AtomGroup.Universe` The MD Trajectory for dimension reduction, remember that computational - cost scales at O(n^3). Cost can be reduced by increasing step interval or - specifying a start and stop + cost scales at O(N^3) where N is the number of frames. + Cost can be reduced by increasing step interval or specifying a + start and stop select: str, optional 1. any valid selection string for :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` - This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. + This selection of atoms is used to calculate the RMSD between + different frames. Water should be excluded. epsilon : float, optional - Specifies the epsilon used for the diffusion map. More information in [1] and [2] - With 'average' the average of the RMSD to the k-nearest-neighbor will be used. + Specifies the epsilon used for the diffusion map. More information + in [1] and [2]. With 'average' the average of the RMSD to the k-th + greatest value is used. (#TODO, check if actually agrees with theory) k : int, optional - specifies the k for the k-nearest-neighbor is average epsilon is used. + specifies the k for the epsilon choice if average epsilon is + used. weights: list, optional The list has to have the same length as the trajectory. - With 'None' the weight of each frame of the trajectory will be the same. + With 'None' the weight of each frame of the trajectory will be the + same. metric : function, optional Maps two numpy arrays to a float, is positive definite and symmetric, Default: ``None`` sets metric to qcp.CalcRMSDRotationalMatrix @@ -146,88 +158,109 @@ def __init__(self, u, select='all', epsilon='average', k=10, weights=None, step : int, optional Step between frames to analyse, Default: 1 """ - self.u = u + self._u = u + self.select = select self.atoms = u.select_atoms(select) - self.natoms = self.atoms.n_atoms - self.k = k - frames = u.trajectory - self.epsilon = epsilon + self._natoms = self.atoms.n_atoms + + traj = u.trajectory + self._epsilon = epsilon if metric is not None: - self.metric = metric + self._metric = metric else: - self.metric = qcp.CalcRMSDRotationalMatrix - - self._setup_frames(frames, start, stop, step) + self._metric = qcp.CalcRMSDRotationalMatrix + # remember that this must be called before referencing self.nframes + self._setup_frames(traj, start, stop, step) + # modulus to prevent array index out of bounds exception + self._k = k % self.nframes if weights is None: - self.weights = np.ones((self.nframes,)) + # weights do not apply to metric + self._weights = np.ones((self.nframes,)) else: if weights.shape[0] != self.nframes: - raise ValueError("The weight should have the same length as the trajectroy") + raise ValueError("The weight should have the same length as the" + 'trajectory') else: # weights are constructed as relative to the mean - self.weights = np.asarray(weights, dtype=np.float64) / np.mean(weights) + self._weights = (np.asarray(weights, dtype=np.float64) / + np.mean(weights)) def _prepare(self): - self.rmsd_matrix = np.zeros((self.nframes,self.nframes)) - - if self.epsilon == 'average': - self.epsilon = np.zeros((self.nframes, ), ) - self.type_epsilon = 'average' + self.diffusion_matrix = np.zeros((self.nframes, self.nframes)) + if self._epsilon == 'average': + self._epsilon = np.zeros((self.nframes, ), ) + self._type_epsilon = 'average' else: - value_epsilon = epsilon - self.epsilon = np.full((nframes, ), value_epsilon) - self.type_epsilon = 'constant' + value_epsilon = self._epsilon + self._epsilon = np.full((self.nframes, ), value_epsilon) + self._type_epsilon = 'constant' - self.rot = np.zeros(9) + self._rot = np.zeros(9) def _single_frame(self): - logger.info("_ts.frame {0}, numframes{1}".format(self._ts.frame, self.nframes)) traj_index = self._ts.frame - i_ref = np.copy(self.u.trajectory[traj_index].positions-self.atoms.center_of_mass()) + i_ref = self.atoms.positions - self.atoms.center_of_mass() - #diagonal entries need not be calculated due to metric(x,x) == 0 in theory + # diagonal entries need not be calculated due to metric(x,x) == 0 in + # theory, _ts not updated properly. Possible savings by setting a cutoff + # for significant decimal places to sparsify matrix for j in range(self.nframes-1, self._ts.frame-1, -1): - j_ref = np.copy(self.u.trajectory[j].positions-self.atoms.center_of_mass()) - self.rmsd_matrix[traj_index, j] = self.metric(i_ref.T.astype(np.float64),\ - j_ref.T.astype(np.float64), self.natoms, self.rot, self.weights) + self._ts = self._u.trajectory[j] + j_ref = self.atoms.positions - self.atoms.center_of_mass() + ij_result = self._metric(i_ref.T.astype(np.float64), + j_ref.T.astype(np.float64), self._natoms, + self._rot, weights=None) + if(ij_result < 1E-05): + self.diffusion_matrix[traj_index, j] = 0 + else: + self.diffusion_matrix[traj_index, j] = ij_result def _conclude(self): - self.rmsd_matrix = self.rmsd_matrix + self.rmsd_matrix.T - \ - np.diag(self.rmsd_matrix.diagonal()) + self.diffusion_matrix = (self.diffusion_matrix + + self.diffusion_matrix.T - + np.diag(self.diffusion_matrix.diagonal())) - if self.type_epsilon == 'average': + logger.info('printing diffusion matrix: {0}'.format(self.diffusion_matrix)) + if self._type_epsilon == 'average': for i in range(self.nframes): - #np.argsort(rmsd_matrix[i,:])#[10]] - self.epsilon[i] = self.rmsd_matrix[i, \ - np.argsort(self.rmsd_matrix[i, :])[self.k]] + # np.argsort(diffusion_matrix[i,:])#[10]] + # picks k largest rmsd value in column for choice of epsilon + self._epsilon[i] = self.diffusion_matrix[i, np.argsort( + self.diffusion_matrix[i, :])[self._k]] + + self._epsilon = np.full((self.nframes, ), self._epsilon.mean()) - self.epsilon = np.full((self.nframes, ), self.epsilon.mean()) + logger.info('printing epsilon: {0}'.format(self._epsilon)) - self.kernel2 = np.zeros((self.nframes, self.nframes)) + self._kernel2 = np.zeros((self.nframes, self.nframes)) - #possibly mappable + # possibly mappable for i in range(self.nframes): - self.kernel2[i, :] = np.exp(-self.rmsd_matrix[i, :]**2/(self.epsilon[i]*self.epsilon[:])) + self._kernel2[i, :] = (np.exp((-self.diffusion_matrix[i, :] ** 2) / + (self._epsilon[i]*self._epsilon[:]))) + logger.info('printing kernel: {0}'.format(self._kernel2)) p_vector = np.zeros((self.nframes, )) d_vector = np.zeros((self.nframes, )) for i in range(self.nframes): - p_vector[i] = np.dot(self.kernel2[i, :], self.weights) + p_vector[i] = np.dot(self._kernel2[i, :], self._weights) - self.kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) + self._kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) for i in range(self.nframes): - d_vector[i] = np.dot(self.kernel2[i, :], self.weights) + d_vector[i] = np.dot(self._kernel2[i, :], self._weights) + logger.info('printing d_vector matrix: {0}'.format(d_vector)) for i in range(self.nframes): - self.kernel2[i, :] = self.kernel2[i, :]*self.weights + self._kernel2[i, :] = self._kernel2[i, :] * self._weights - self.kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) - #eigenvalues and eigenvector are the collective coordinates - eg, ev = np.linalg.eig(self.kernel2) + self._kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) + logger.info('printing final kernel: {0}'.format(self._kernel2)) + # eigenvalues and eigenvector are the collective coordinates + eigenvals, eigenvectors = np.linalg.eig(self._kernel2) - eg_arg = np.argsort(eg) - self.eigenvalues = eg[eg_arg[::-1]] - self.eigenvectors = ev[eg_arg[::-1],:] + eg_arg = np.argsort(eigenvals) + self.eigenvalues = eigenvals[eg_arg[::-1]] + self.eigenvectors = eigenvectors[eg_arg[::-1], :] diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index 8242d5890e6..6bf39be32c4 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -14,9 +14,10 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from __future__ import print_function +import numpy as np import MDAnalysis import MDAnalysis.analysis.diffusionmap as diffusionmap -from numpy.testing import (assert_almost_equal, assert_equal) +from numpy.testing import (assert_almost_equal, assert_equal, raises) from MDAnalysisTests.datafiles import PDB, XTC @@ -24,30 +25,47 @@ class TestDiffusionmap(object): def __init__(self): - #slow 6s test - #u = MDAnalysis.Universe(PSF,DCD) - #eg,ev=diffusionmap.diffusionmap(u) - #assert_equal(self.eg.shape, (98,)) - #assert_almost_equal(self.eg[0], 1.0) - #assert_almost_equal(self.eg[-1],0.03174182) - #assert_equal(self.ev.shape, (98,98)) - #assert_almost_equal(self.ev[0,0], .095836037343022831) - #faster - u = MDAnalysis.Universe(PDB, XTC) - eg, ev = diffusionmap.diffusionmap(u, select = 'backbone', k = 5) - self.eg = eg - self.ev = ev - + # slow 6s test + # u = MDAnalysis.Universe(PSF,DCD) + # eg,ev=diffusionmap.diffusionmap(u) + # assert_equal(self.eg.shape, (98,)) + # assert_almost_equal(self.eg[0], 1.0) + # assert_almost_equal(self.eg[-1],0.03174182) + # assert_equal(self.ev.shape, (98,98)) + # assert_almost_equal(self.ev[0,0], .095836037343022831) + # faster + self.u = MDAnalysis.Universe(PDB, XTC) + self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone', k=5) + self.dmap.run() + self.eigvals = self.dmap.eigenvalues + self.eigvects = self.dmap.eigenvectors + self.weights = np.ones((self.dmap.nframes, )) def test_eg(self): - assert_equal(self.eg.shape, (10, )) - assert_almost_equal(self.eg[0], 1.0, decimal=5) - assert_almost_equal(self.eg[-1], 0.0142, decimal = 3) - + # number of frames is trajectory is now 10 vs. 98 + assert_equal(self.eigvals.shape, (self.dmap.nframes, )) + assert_almost_equal(self.eigvals[0], 1.0, decimal=5) + assert_almost_equal(self.eigvals[-1], 0.006621, decimal=3) def test_ev(self): - assert_equal(self.ev.shape, (10, 10)) - assert_almost_equal(self.ev[0, 0], -0.3019, decimal = 2) + assert_equal(self.eigvects.shape, (self.dmap.nframes, self.dmap.nframes)) + assert_almost_equal(self.eigvects[0, 0], 0.3172, decimal=2) + def test_weights(self): + dmap2 = diffusionmap.DiffusionMap(self.u, select='backbone', + weights=self.weights, k=5) + dmap2.run() + assert_almost_equal(self.eigvals, dmap2.eigenvalues, decimal=5) + assert_almost_equal(self.eigvects, dmap2.eigenvectors, decimal=6) + @raises(ValueError) + def test_wrong_weights(self): + dmap2 = diffusionmap.DiffusionMap(self.u, select='backbone', + weights=np.ones((2,)), + k=5) + def test_constant_epsilon(self): + dmap3 = diffusionmap.DiffusionMap(self.u, select='backbone', k=5, + epsilon=16.12272861) + dmap3.run() + assert_almost_equal(self.eigvals, dmap3.eigenvalues, decimal=5) From 19c3675c08064a4f363a044f4d6d1792805c47ba Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Fri, 10 Jun 2016 13:52:53 -0700 Subject: [PATCH 10/22] Separated calculation of distance matrix from diffusion map calculation decided to calculate epsilon as a function outside of class for maximum usability. Change of direction in API, more work to be done implementing everything Changes to docs, functions, tried to make it very evident how we were following the lafon paper in terms of algorithm. added timescaling, fixed some parts --- package/MDAnalysis/analysis/diffusionmap.py | 326 +++++++++++------- .../analysis/diffusionmap.rst | 0 .../documentation_pages/analysis_modules.rst | 10 +- .../analysis/test_diffusionmap.py | 8 +- 4 files changed, 211 insertions(+), 133 deletions(-) create mode 100644 package/doc/sphinx/source/documentation_pages/analysis/diffusionmap.rst diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 1a7c6e7e51c..20acda3df7d 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -28,16 +28,20 @@ assumes that the trajectory is long enough to represent a probability distribution of as protein close to the equilibrium. Furthermore, the diffusion map assumes that the diffusion coefficients are constant. The eigenvectors with -the largest eigenvalues are the slowest collective coordinates. The complexity of -the diffusion map is O(N^3), where N is the number of frames in the trajectory. -Instead of a single trajectory a sample of protein structures can be used. -The sample should be equiblibrated, at least locally. Different weights can be -used. The order of the sampled structures in the trajectory is irrelevant. +the largest eigenvalues are the more dominant collective coordinates. Assigning +phyiscal meaning to the reaction coordinates is a fundamentally difficult +problem. The time complexity of the diffusion map is O(N^3), where N is the +number of frames in the trajectory, and the storage complexity is O(N^3). +Instead of a single trajectory a sample of protein structures +can be used. The sample should be equiblibrated, at least locally. Different +weights can be used to determine the anisotropy of the diffusion kernel. +The motivation for the creation of an anisotropic kernel is given on page 14 of +[Lafon1]_. The order of the sampled structures in the trajectory is irrelevant. The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. -More details about diffusion maps are in [Lafon1]_ and [Clementi1]_. +More details about diffusion maps are in [Lafon1]_ , [Ferguson1], and [Clementi1]_. .. _Diffusion-Map-tutorial: @@ -47,9 +51,11 @@ The example uses files provided as part of the MDAnalysis test suite (in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and :data:`~MDAnalysis.tests.datafiles.DCD`). Notice that this trajectory -does not represent a sample from the equilibrium. This violates a basic -assumption of the diffusion map and the results shouldn't be interpreted for this reason. -This tutorial shows how to use the diffusionmap function. +does not represent a sample from equilibrium. This violates a basic +assumption of the anisotropic kernel. The anisotropic kernel is created to act +as a discrete approximation for the Fokker-Plank equation from statistical +mechanics, results from a non-equilibrium trajectory shouldn't be interpreted +for this reason. This tutorial shows how to use the diffusionmap function. First load all modules and test data :: >>> import MDAnalysis @@ -57,27 +63,40 @@ >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF,DCD -Given an universe or atom group, we can calculate the diffusion map from -that trajectory using :class:`DiffusionMap`:: and get the corresponding eigenvalues -and eigenvectors +Given a universe or atom group, we can calculate the diffusion map from +that trajectory using :class:`DiffusionMap`:: and get the corresponding +eigenvalues and eigenvectors. >>> u = MDAnalysis.Universe(PSF,DCD) - >>> dmap = diffusionmap.DiffusionMap(u) + >>> d_matrix = DistMatrix(u) + >>> dmap = diffusionmap.DiffusionMap(DistMatrix) >>> dmap.run() >>> eigenvalues = dmap.eigenvalues >>> eigenvectors = dmap.eigenvectors +From here we can perform an embedding onto the k dominant eigenvectors. + + >>> ts_one = u.trajectory[0] + >>> params_on_frame = dmap.embedding(ts_one) + +This is calculated from an ad hoc determination of the spectral gap, if a more +rigorous investigation of the data is expected, you should probably do this on +your own. + Classes ------- .. autoclass:: DiffusionMap - :members: +.. autoclass:: DistMatrix +.. autoclass:: Epsilon References --------- If you use this Dimension Reduction method in a publication, please reference: +..[Ferguson1] Ferguson, A. L.; Panagiotopoulos, A. Z.; Kevrekidis, I. G.; +Debenedetti, P. G. Chem. Phys. Lett. 2011, 509, 1−11 ..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. Appl. Comput. Harmon. Anal. 21, 5–30. ..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) @@ -93,64 +112,57 @@ """ from six.moves import range import logging + import numpy as np -import MDAnalysis.lib.qcprot as qcp +from MDAnalysis.analysis import rms from .base import AnalysisBase logger = logging.getLogger("MDAnalysis.analysis.diffusionmap") -class DiffusionMap(AnalysisBase): - """Non-linear dimension reduction method - - Dimension reduction with diffusion map of the structures in the universe. +class DistMatrix(AnalysisBase): + """ Calculate the pairwise distance between each frame in a trajectory using + a given metric Attributes ---------- atoms : AtomGroup Selected atoms in trajectory subject to dimension reduction - diffusion_matrix : array - Array of all possible ij metric distances between frames in trajectory - eigenvalues: array - Eigenvalues of the diffusion map - eigenvectors: array - Eigenvectors of the diffusion map - The second and higher eigenvectors ev[i+1,:] represent the i-th slowest - collective coordinates. - + dist_matrix : array + Array of all possible ij metric distances between frames in trajectory. + This matrix is symmetric with zeros on the diagonal. + + Methods + ------- + save(filename) + Save the `dist_matrix` to a given filename """ - - def __init__(self, u, select='all', epsilon='average', k=10, weights=None, - metric=None, start=None, stop=None, step=None): + def __init__(self, u, select='all', metric=rms.rmsd, cutoff=1E0-5, + weights=None, start=None, stop=None, step=None): """ Parameters - ------------- + ---------- u : trajectory `~MDAnalysis.core.AtomGroup.Universe` - The MD Trajectory for dimension reduction, remember that computational - cost scales at O(N^3) where N is the number of frames. + The MD Trajectory for dimension reduction, remember that + computational cost of eigenvalue decomposition + scales at O(N^3) where N is the number of frames. Cost can be reduced by increasing step interval or specifying a - start and stop + start and stop. select: str, optional - 1. any valid selection string for + Any valid selection string for :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. - epsilon : float, optional - Specifies the epsilon used for the diffusion map. More information - in [1] and [2]. With 'average' the average of the RMSD to the k-th - greatest value is used. (#TODO, check if actually agrees with theory) - k : int, optional - specifies the k for the epsilon choice if average epsilon is - used. - weights: list, optional - The list has to have the same length as the trajectory. - With 'None' the weight of each frame of the trajectory will be the - same. metric : function, optional - Maps two numpy arrays to a float, is positive definite and symmetric, - Default: ``None`` sets metric to qcp.CalcRMSDRotationalMatrix + Maps two numpy arrays to a float, is positive definite and + symmetric, Default: metric is set to rms.rmsd(). + cutoff : float, optional + Specify a given cutoff for metric values to be considered equal, + Default: 1EO-5 + weights : array, optional + Weights to be given to coordinates for metric calculation start : int, optional First frame of trajectory to analyse, Default: 0 stop : int, optional @@ -158,109 +170,173 @@ def __init__(self, u, select='all', epsilon='average', k=10, weights=None, step : int, optional Step between frames to analyse, Default: 1 """ - self._u = u - self.select = select - self.atoms = u.select_atoms(select) - self._natoms = self.atoms.n_atoms - traj = u.trajectory - self._epsilon = epsilon - if metric is not None: - self._metric = metric - else: - self._metric = qcp.CalcRMSDRotationalMatrix + self._u = u + traj = self._u.trajectory + self.atoms = self._u.select_atoms(select) + self._metric = metric + self._cutoff = cutoff + self._weights = weights # remember that this must be called before referencing self.nframes self._setup_frames(traj, start, stop, step) - # modulus to prevent array index out of bounds exception - self._k = k % self.nframes - if weights is None: - # weights do not apply to metric - self._weights = np.ones((self.nframes,)) - else: - if weights.shape[0] != self.nframes: - raise ValueError("The weight should have the same length as the" - 'trajectory') - else: - # weights are constructed as relative to the mean - self._weights = (np.asarray(weights, dtype=np.float64) / - np.mean(weights)) - def _prepare(self): - self.diffusion_matrix = np.zeros((self.nframes, self.nframes)) - if self._epsilon == 'average': - self._epsilon = np.zeros((self.nframes, ), ) - self._type_epsilon = 'average' - else: - value_epsilon = self._epsilon - self._epsilon = np.full((self.nframes, ), value_epsilon) - self._type_epsilon = 'constant' - - self._rot = np.zeros(9) + logger.info('numframes {0}'.format(self.nframes)) + self.dist_matrix = np.zeros((self.nframes, self.nframes)) def _single_frame(self): traj_index = self._ts.frame i_ref = self.atoms.positions - self.atoms.center_of_mass() - # diagonal entries need not be calculated due to metric(x,x) == 0 in - # theory, _ts not updated properly. Possible savings by setting a cutoff - # for significant decimal places to sparsify matrix - for j in range(self.nframes-1, self._ts.frame-1, -1): + # theory, _ts not updated properly. Possible savings by setting a + # cutoff for significant decimal places to sparsify matrix + for j in range(self.stop-1, self._ts.frame-1, -self.step): self._ts = self._u.trajectory[j] - j_ref = self.atoms.positions - self.atoms.center_of_mass() - ij_result = self._metric(i_ref.T.astype(np.float64), - j_ref.T.astype(np.float64), self._natoms, - self._rot, weights=None) - if(ij_result < 1E-05): - self.diffusion_matrix[traj_index, j] = 0 - else: - self.diffusion_matrix[traj_index, j] = ij_result + j_ref = self.atoms.positions-self.atoms.center_of_mass() + dist = self._metric(i_ref, j_ref, weights=self._weights) + # distance squared in preparation for kernel calculation + # don't think this will come up in other areas, but might be + # a fix we should make later + self.dist_matrix[traj_index, j] = dist**2 if dist > self._cutoff + else 0 + self.dist_matrix[j, traj_index] = self.dist_matrix[traj_index, j] - def _conclude(self): - self.diffusion_matrix = (self.diffusion_matrix + - self.diffusion_matrix.T - - np.diag(self.diffusion_matrix.diagonal())) + def save(self, filename): + np.savetxt(filename, self.dist_matrix) + logger.info("Wrote the distance-squared matrix to file %r", filename) - logger.info('printing diffusion matrix: {0}'.format(self.diffusion_matrix)) - if self._type_epsilon == 'average': - for i in range(self.nframes): - # np.argsort(diffusion_matrix[i,:])#[10]] - # picks k largest rmsd value in column for choice of epsilon - self._epsilon[i] = self.diffusion_matrix[i, np.argsort( - self.diffusion_matrix[i, :])[self._k]] - self._epsilon = np.full((self.nframes, ), self._epsilon.mean()) +class Epsilon(object): + """Manipulates a distance matrix by local scale parameters + + Attributes + ---------- + scaledMatrix : DistMatrix object + A matrix with each term divided by a local scale parameter - logger.info('printing epsilon: {0}'.format(self._epsilon)) + Methods + ------- + determineEpsilon() + Determine local scale parameters using a chosen algorithm + """ + def __init__(self, DistMatrix, **kwargs): + self._distMatrix = DistMatrix - self._kernel2 = np.zeros((self.nframes, self.nframes)) + def determineEpsilon(self): + pass - # possibly mappable - for i in range(self.nframes): - self._kernel2[i, :] = (np.exp((-self.diffusion_matrix[i, :] ** 2) / - (self._epsilon[i]*self._epsilon[:]))) - logger.info('printing kernel: {0}'.format(self._kernel2)) - p_vector = np.zeros((self.nframes, )) - d_vector = np.zeros((self.nframes, )) +class DiffusionMap(DistMatrix): + """Non-linear dimension reduction method + Dimension reduction with diffusion map of the structures in the universe. + + Attributes + ---------- + eigenvalues: array + Eigenvalues of the diffusion map + eigenvectors: array + Eigenvectors of the diffusion map + + Methods + ------- + spectral_gap() + Retrieve a guess for the set of eigenvectors reflecting the intrinsic + dimensionality of the molecular system. + + embedding(timestep) + Perform an embedding of a frame into the eigenvectors representing + the collective coordinates. + """ + + def __init__(self, DistMatrix, epsilon=EpsilonConstant, weights=None): + """ + Parameters + ------------- + DistMatrix : DistMatrix object + Distance matrix to be made into a diffusion kernel and perform + an eigenvector decomposition on. + epsilon : function, optional + Specifies the method used for the choice of scale parameter in the + diffusion map. More information in [1] and [2]. With 'average' + the average of the RMSD to the k-th greatest value is used. + weights: list, optional + The list has to have the same length as the trajectory. + With 'None' the weight of each frame of the trajectory will be the + same. + """ + self._epsilon = epsilon + # this should be a reference to the same object as + # epsilon.scaledMatrix + self.dist_matrix = DistMatrix.dist_matrix + if weights_ker is None: + # weights do not apply to metric + self._weights_ker = np.ones((self.nframes,)) + else: + if weights.shape[0] != self.nframes: + raise ValueError("The weight should have the same length as " + 'the trajectory') + else: + # weights are constructed as relative to the mean + self._weights_ker = (np.asarray(weights, dtype=np.float64) / + np.mean(weights)) + + def _conclude(self): + self._epsilon.determineEpsilon() + self._kernel = self._epsilon.scaledMatrix + # take negative exponent of scaled matrix to create Isotropic kernel + self._kernel = np.exp(-self._kernel) + + # define an anistropic diffusion term q + q_vector = np.zeros((self.nframes, )) + # weights should reflect the density of the points on the manifold for i in range(self.nframes): - p_vector[i] = np.dot(self._kernel2[i, :], self._weights) + q_vector[i] = np.dot(self._kernel2[i, :], self._weights_ker) - self._kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) + # Form a new kernel from the anisotropic diffusion term q + self._kernel /= np.sqrt(q_vector[:, np.newaxis].dot(q_vector[np.newaxis])) + # Weighted Graph Laplacian normalization on this new graph + d_vector = np.zeros((self.nframes, )) for i in range(self.nframes): - d_vector[i] = np.dot(self._kernel2[i, :], self._weights) + d_vector[i] = np.dot(self._kernel2[i, :], self._weights_ker) - logger.info('printing d_vector matrix: {0}'.format(d_vector)) for i in range(self.nframes): - self._kernel2[i, :] = self._kernel2[i, :] * self._weights + self._kernel[i, :] = self._kernel2[i, :] * self._weights_ker + + # Define anisotropic transition by dividing kernel by this term + self._kernel /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) - self._kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) - logger.info('printing final kernel: {0}'.format(self._kernel2)) - # eigenvalues and eigenvector are the collective coordinates - eigenvals, eigenvectors = np.linalg.eig(self._kernel2) + # Apply timescaling, HELP, I think I'm pythoning wrong + for i in range(t): + if i > 1: + self._kernel.__matmul__(self._kernel) + + eigenvals, eigenvectors = np.linalg.eig(self._kernel) eg_arg = np.argsort(eigenvals) self.eigenvalues = eigenvals[eg_arg[::-1]] self.eigenvectors = eigenvectors[eg_arg[::-1], :] + + def spectral_gap(self): + # TODO + pass + + def embedding(self, timestep): + # TODO + pass + + +class EpsilonConstant(Epsilon): + """Premade function for the determination of epsilon based on providing + an arbitrary constant""" + + def __init__(self, DistMatrix, epsilon): + """ + Parameters + ---------- + epsilon : int + The value of epsilon to be used as a local scale parameter + """ + self._epsilon = epsilon + epsilon = np.full((nframes, ), epsilon) diff --git a/package/doc/sphinx/source/documentation_pages/analysis/diffusionmap.rst b/package/doc/sphinx/source/documentation_pages/analysis/diffusionmap.rst new file mode 100644 index 00000000000..e69de29bb2d diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index df3f66c98ad..29f29697eb1 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -19,7 +19,7 @@ has to import them from :mod:`MDAnalysis.analysis`, for instance :: import MDAnalysis.analysis.align -.. Note:: +.. Note:: Some of the modules require additional Python packages such as :mod:`scipy` from the SciPy_ package or :mod:`networkx` from @@ -100,7 +100,7 @@ Structure Volumetric analysis -=================== +=================== .. toctree:: :maxdepth: 1 @@ -109,4 +109,8 @@ Volumetric analysis analysis/lineardensity analysis/waterdynamics - +Dimension Reduction +=================== +.. toctree:: + :maxdepth: 1 + analysis/diffusionmap diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index 6bf39be32c4..fca3bf211c0 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -35,7 +35,7 @@ def __init__(self): # assert_almost_equal(self.ev[0,0], .095836037343022831) # faster self.u = MDAnalysis.Universe(PDB, XTC) - self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone', k=5) + self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone') self.dmap.run() self.eigvals = self.dmap.eigenvalues self.eigvects = self.dmap.eigenvectors @@ -61,11 +61,9 @@ def test_weights(self): @raises(ValueError) def test_wrong_weights(self): dmap2 = diffusionmap.DiffusionMap(self.u, select='backbone', - weights=np.ones((2,)), - k=5) + weights=np.ones((2,))) def test_constant_epsilon(self): - dmap3 = diffusionmap.DiffusionMap(self.u, select='backbone', k=5, - epsilon=16.12272861) + dmap3 = diffusionmap.DiffusionMap(self.u, select='backbone') dmap3.run() assert_almost_equal(self.eigvals, dmap3.eigenvalues, decimal=5) From 786ef01fc53514cd45c257b2dbf3e0d30ea42774 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Thu, 16 Jun 2016 15:39:03 -0700 Subject: [PATCH 11/22] Automodule added, Changed some object oriented stuff around DistMatrix is its own AnalysisBases subclass, DiffusionMap still is an AnalysisBase subclass but only for the actual diffusion map embedding. Previous _conclude function renamed to kernel_decompose. Changed things around to get embedding working Added tests, found bugs, squashed bugs. Eliminated square of distance, random mistake --- package/MDAnalysis/analysis/diffusionmap.py | 191 ++++++++++++------ .../analysis/diffusionmap.rst | 1 + .../analysis/test_diffusionmap.py | 56 +++-- 3 files changed, 165 insertions(+), 83 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 20acda3df7d..370cf0fc38c 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -27,11 +27,12 @@ coordinates for a trajectory. This non-linear dimension reduction method assumes that the trajectory is long enough to represent a probability distribution of as protein close to the equilibrium. Furthermore, the diffusion -map assumes that the diffusion coefficients are constant. The eigenvectors with +map assumes that the diffusion coefficients associated with the dynamical +motion of molecules in the system are constant. The eigenvectors with the largest eigenvalues are the more dominant collective coordinates. Assigning -phyiscal meaning to the reaction coordinates is a fundamentally difficult +phyiscal meaning of the 'collective coordinates' is a fundamentally difficult problem. The time complexity of the diffusion map is O(N^3), where N is the -number of frames in the trajectory, and the storage complexity is O(N^3). +number of frames in the trajectory, and the storage complexity is O(N^2). Instead of a single trajectory a sample of protein structures can be used. The sample should be equiblibrated, at least locally. Different weights can be used to determine the anisotropy of the diffusion kernel. @@ -41,7 +42,8 @@ The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. -More details about diffusion maps are in [Lafon1]_ , [Ferguson1], and [Clementi1]_. +More details about diffusion maps are in [Lafon1]_ , [Ferguson1]_, and +[Clementi1]_. .. _Diffusion-Map-tutorial: @@ -68,16 +70,29 @@ eigenvalues and eigenvectors. >>> u = MDAnalysis.Universe(PSF,DCD) - >>> d_matrix = DistMatrix(u) - >>> dmap = diffusionmap.DiffusionMap(DistMatrix) - >>> dmap.run() + >>> dist_matrix = DistanceMatrix(u) + +We leave determination of the appropriate scale parameter epsilon to the user, +[Clementi1]_ uses a complex method involving the k-nearest-neighbors of a +trajectory frame, whereas others simple use a trial-and-error approach with +a constant epsilon. For those users, a +`~MDAnalysis.analysis.diffusionmap.EpsilonConstant` class has been provided. +Any user choosing to write their own Epsilon class must write a class that +inherits from `~MDAnalysis.analysis.diffusionmap.Epsilon`, and call the +`determine_epsilon` function required by the API before running the diffusion +map. + + >>> epsilon_matrix = EpsilonConstant(DistanceMatrix, 500) + >>> epsilon_matrix.determine_epsilon() + >>> dmap = diffusionmap.DiffusionMap(dist_matrix, epsilon_matrix) + >>> dmap.decompose_kernel() >>> eigenvalues = dmap.eigenvalues >>> eigenvectors = dmap.eigenvectors From here we can perform an embedding onto the k dominant eigenvectors. - >>> ts_one = u.trajectory[0] - >>> params_on_frame = dmap.embedding(ts_one) + >>> + >>> params_on_frame = dmap.transform(ts_one) This is calculated from an ad hoc determination of the spectral gap, if a more rigorous investigation of the data is expected, you should probably do this on @@ -122,7 +137,7 @@ logger = logging.getLogger("MDAnalysis.analysis.diffusionmap") -class DistMatrix(AnalysisBase): +class DistanceMatrix(AnalysisBase): """ Calculate the pairwise distance between each frame in a trajectory using a given metric @@ -185,21 +200,23 @@ def _prepare(self): self.dist_matrix = np.zeros((self.nframes, self.nframes)) def _single_frame(self): - traj_index = self._ts.frame + i = self._ts.frame i_ref = self.atoms.positions - self.atoms.center_of_mass() + # diagonal entries need not be calculated due to metric(x,x) == 0 in # theory, _ts not updated properly. Possible savings by setting a # cutoff for significant decimal places to sparsify matrix - for j in range(self.stop-1, self._ts.frame-1, -self.step): - self._ts = self._u.trajectory[j] - j_ref = self.atoms.positions-self.atoms.center_of_mass() - dist = self._metric(i_ref, j_ref, weights=self._weights) - # distance squared in preparation for kernel calculation - # don't think this will come up in other areas, but might be - # a fix we should make later - self.dist_matrix[traj_index, j] = dist**2 if dist > self._cutoff - else 0 - self.dist_matrix[j, traj_index] = self.dist_matrix[traj_index, j] + for j, ts in enumerate(self._u.trajectory[i:self.stop:self.step]): + if i == j: + self.dist_matrix[i,i] = 0 + else: + self._ts = ts + j_ref = self.atoms.positions-self.atoms.center_of_mass() + dist = self._metric(i_ref, j_ref, weights=self._weights) + self.dist_matrix[i, j] = dist if dist > self._cutoff else 0 + self.dist_matrix[j, i] = self.dist_matrix[i, j] + + self._ts = self._u.trajectory[i] def save(self, filename): np.savetxt(filename, self.dist_matrix) @@ -211,7 +228,7 @@ class Epsilon(object): Attributes ---------- - scaledMatrix : DistMatrix object + scaledMatrix : DistanceMatrix object A matrix with each term divided by a local scale parameter Methods @@ -219,17 +236,37 @@ class Epsilon(object): determineEpsilon() Determine local scale parameters using a chosen algorithm """ - def __init__(self, DistMatrix, **kwargs): - self._distMatrix = DistMatrix + def __init__(self, DistanceMatrix, **kwargs): + self._DistanceMatrix = DistMatrix - def determineEpsilon(self): + def determine_epsilon(self): pass -class DiffusionMap(DistMatrix): +class EpsilonConstant(Epsilon): + """Premade function for the determination of epsilon based on providing + an arbitrary constant""" + + def __init__(self, DistanceMatrix, epsilon): + """ + Parameters + ---------- + epsilon : int + The value of epsilon to be used as a local scale parameter + """ + self.scaledMatrix = DistanceMatrix.dist_matrix + self._epsilon = epsilon + + def determine_epsilon(self): + self.scaledMatrix /= self._epsilon + return + + +class DiffusionMap(AnalysisBase): """Non-linear dimension reduction method - Dimension reduction with diffusion map of the structures in the universe. + Dimension reduction with diffusion mapping of selected structures in a + trajectory. Attributes ---------- @@ -237,43 +274,53 @@ class DiffusionMap(DistMatrix): Eigenvalues of the diffusion map eigenvectors: array Eigenvectors of the diffusion map + fit : array + After calling `transform(num_eigenvectors)` the diffusion map embedding + into the lower dimensional diffusion space will exist here. Methods ------- + decompose_kernel() + Constructs an anisotropic diffusion kernel and performs eigenvalue + decomposition on it. + spectral_gap() Retrieve a guess for the set of eigenvectors reflecting the intrinsic dimensionality of the molecular system. - embedding(timestep) + transform(num_eigenvectors) Perform an embedding of a frame into the eigenvectors representing the collective coordinates. """ - def __init__(self, DistMatrix, epsilon=EpsilonConstant, weights=None): + def __init__(self, DistanceMatrix, epsilon, weights=None, timescale=1): """ Parameters ------------- - DistMatrix : DistMatrix object + DistanceMatrix : DistanceMatrix object Distance matrix to be made into a diffusion kernel and perform an eigenvector decomposition on. - epsilon : function, optional + epsilon : Epsilon object Specifies the method used for the choice of scale parameter in the - diffusion map. More information in [1] and [2]. With 'average' - the average of the RMSD to the k-th greatest value is used. + diffusion map. More information in [1], [2] and [3]. weights: list, optional The list has to have the same length as the trajectory. With 'None' the weight of each frame of the trajectory will be the same. + timescale: int, optional + The number of steps in the random walk, large t reflects global + structure whereas small t indicates local structure. """ + self.DistanceMatrix = DistanceMatrix + self._nframes = DistanceMatrix.nframes + self._t = timescale self._epsilon = epsilon - # this should be a reference to the same object as - # epsilon.scaledMatrix - self.dist_matrix = DistMatrix.dist_matrix - if weights_ker is None: - # weights do not apply to metric - self._weights_ker = np.ones((self.nframes,)) + + if weights is None: + # weights do not apply to metric but density of data + self._weights_ker = np.ones((self._nframes,)) else: - if weights.shape[0] != self.nframes: + if weights.shape[0] != self._nframes: raise ValueError("The weight should have the same length as " 'the trajectory') else: @@ -281,34 +328,36 @@ def __init__(self, DistMatrix, epsilon=EpsilonConstant, weights=None): self._weights_ker = (np.asarray(weights, dtype=np.float64) / np.mean(weights)) - def _conclude(self): - self._epsilon.determineEpsilon() + def decompose_kernel(self): + # this should be a reference to the same object as + # self.DistanceMatrix.dist_matrix self._kernel = self._epsilon.scaledMatrix + # take negative exponent of scaled matrix to create Isotropic kernel self._kernel = np.exp(-self._kernel) # define an anistropic diffusion term q - q_vector = np.zeros((self.nframes, )) + q_vector = np.zeros((self._nframes, )) # weights should reflect the density of the points on the manifold - for i in range(self.nframes): - q_vector[i] = np.dot(self._kernel2[i, :], self._weights_ker) + for i in range(self._nframes): + q_vector[i] = np.dot(self._kernel[i, :], self._weights_ker) # Form a new kernel from the anisotropic diffusion term q self._kernel /= np.sqrt(q_vector[:, np.newaxis].dot(q_vector[np.newaxis])) # Weighted Graph Laplacian normalization on this new graph - d_vector = np.zeros((self.nframes, )) - for i in range(self.nframes): - d_vector[i] = np.dot(self._kernel2[i, :], self._weights_ker) + d_vector = np.zeros((self._nframes, )) + for i in range(self._nframes): + d_vector[i] = np.dot(self._kernel[i, :], self._weights_ker) - for i in range(self.nframes): - self._kernel[i, :] = self._kernel2[i, :] * self._weights_ker + for i in range(self._nframes): + self._kernel[i, :] = self._kernel[i, :] * self._weights_ker # Define anisotropic transition by dividing kernel by this term self._kernel /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) - # Apply timescaling, HELP, I think I'm pythoning wrong - for i in range(t): + # Apply timescaling + for i in range(self._t): if i > 1: self._kernel.__matmul__(self._kernel) @@ -322,21 +371,31 @@ def spectral_gap(self): # TODO pass - def embedding(self, timestep): - # TODO - pass + def _prepare(self): + self.fit = np.zeros((self.eigenvectors.shape[0], + self.num_eigenvectors)) + def _single_frame(self): + # The diffusion map embedding takes the ith sample in the + # data matrix and maps it to each of the ith coordinates + # in the set of k-dominant eigenvectors + for k in range(self.num_eigenvectors): + self.fit[self._ts.frame][k] = self.eigenvectors[k][self._ts.frame] -class EpsilonConstant(Epsilon): - """Premade function for the determination of epsilon based on providing - an arbitrary constant""" + def transform(self, num_eigenvectors): + """ Embeds a trajectory via the diffusion map + + Parameter + --------- + num_eigenvectors : int + The number of dominant eigenvectors to be used for diffusion mapping - def __init__(self, DistMatrix, epsilon): - """ - Parameters - ---------- - epsilon : int - The value of epsilon to be used as a local scale parameter """ - self._epsilon = epsilon - epsilon = np.full((nframes, ), epsilon) + self.num_eigenvectors = num_eigenvectors + self._setup_frames(self.DistanceMatrix._trajectory, + self.DistanceMatrix.start, self.DistanceMatrix.stop, + self.DistanceMatrix.step) + + + self.run() + return self.fit diff --git a/package/doc/sphinx/source/documentation_pages/analysis/diffusionmap.rst b/package/doc/sphinx/source/documentation_pages/analysis/diffusionmap.rst index e69de29bb2d..1eefec1e079 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis/diffusionmap.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis/diffusionmap.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.analysis.diffusionmap diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index fca3bf211c0..42c9ec7b741 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -35,35 +35,57 @@ def __init__(self): # assert_almost_equal(self.ev[0,0], .095836037343022831) # faster self.u = MDAnalysis.Universe(PDB, XTC) - self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone') - self.dmap.run() + self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone') + self.dist.run() + self.epsilon = diffusionmap.EpsilonConstant(self.dist, 500) + self.epsilon.determine_epsilon() + self.dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon) + self.dmap.decompose_kernel() self.eigvals = self.dmap.eigenvalues self.eigvects = self.dmap.eigenvectors - self.weights = np.ones((self.dmap.nframes, )) + self.weights = np.ones((self.dist.nframes, )) def test_eg(self): # number of frames is trajectory is now 10 vs. 98 - assert_equal(self.eigvals.shape, (self.dmap.nframes, )) + assert_equal(self.eigvals.shape, (self.dist.nframes, )) + # makes no sense to test values here, no physical meaning assert_almost_equal(self.eigvals[0], 1.0, decimal=5) - assert_almost_equal(self.eigvals[-1], 0.006621, decimal=3) def test_ev(self): - assert_equal(self.eigvects.shape, (self.dmap.nframes, self.dmap.nframes)) - assert_almost_equal(self.eigvects[0, 0], 0.3172, decimal=2) + #makes no sense to test values here, no physical meaning + assert_equal(self.eigvects.shape, (self.dist.nframes, self.dist.nframes)) def test_weights(self): - dmap2 = diffusionmap.DiffusionMap(self.u, select='backbone', - weights=self.weights, k=5) - dmap2.run() - assert_almost_equal(self.eigvals, dmap2.eigenvalues, decimal=5) - assert_almost_equal(self.eigvects, dmap2.eigenvectors, decimal=6) + dist = diffusionmap.DistanceMatrix(self.u, select='backbone') + dist.run() + dmap = diffusionmap.DiffusionMap(dist,self.epsilon, + weights=self.weights) + dmap.decompose_kernel() + assert_almost_equal(self.eigvals, dmap.eigenvalues, decimal=5) + assert_almost_equal(self.eigvects, dmap.eigenvectors, decimal=6) + @raises(ValueError) def test_wrong_weights(self): - dmap2 = diffusionmap.DiffusionMap(self.u, select='backbone', + dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon, weights=np.ones((2,))) + def test_timescaling(self): + dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon, + timescale=2) + dmap.decompose_kernel() + assert_equal(dmap.eigenvalues.shape, (self.dist.nframes, )) + # makes no sense to test values here, no physical meaning + assert_almost_equal(self.eigvals[0], 1.0, decimal=5) + + + def test_different_steps(self): + dist = diffusionmap.DistanceMatrix(self.u, select='backbone',step=3) + dist.run() + dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon) + dmap.decompose_kernel() - def test_constant_epsilon(self): - dmap3 = diffusionmap.DiffusionMap(self.u, select='backbone') - dmap3.run() - assert_almost_equal(self.eigvals, dmap3.eigenvalues, decimal=5) + def test_transform(self): + self.num_eigenvectors = 4 + self.dmap.transform(self.num_eigenvectors) + assert_equal(self.dmap.fit.shape, (self.eigvects.shape[0], + self.num_eigenvectors)) From 32a9655bd15f118f5c3cb74ecb791ed564a0ba06 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Fri, 17 Jun 2016 20:44:15 -0700 Subject: [PATCH 12/22] Now witness the firepower of this fully armed and operational diffusion map --- package/MDAnalysis/analysis/diffusionmap.py | 69 +++++++++++++-------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 370cf0fc38c..e033c00b9ab 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -26,14 +26,14 @@ The diffusion map provides an estimate of the slowest collective coordinates for a trajectory. This non-linear dimension reduction method assumes that the trajectory is long enough to represent a probability -distribution of as protein close to the equilibrium. Furthermore, the diffusion -map assumes that the diffusion coefficients associated with the dynamical +distribution of a protein close to the equilibrium. Furthermore, the diffusion +map assumes that the diffusion coefficients associated with the dynamical motion of molecules in the system are constant. The eigenvectors with the largest eigenvalues are the more dominant collective coordinates. Assigning phyiscal meaning of the 'collective coordinates' is a fundamentally difficult problem. The time complexity of the diffusion map is O(N^3), where N is the -number of frames in the trajectory, and the storage complexity is O(N^2). -Instead of a single trajectory a sample of protein structures +number of frames in the trajectory, and the in-memory storage complexity is +O(N^2). Instead of a single trajectory a sample of protein structures can be used. The sample should be equiblibrated, at least locally. Different weights can be used to determine the anisotropy of the diffusion kernel. The motivation for the creation of an anisotropic kernel is given on page 14 of @@ -89,14 +89,24 @@ >>> eigenvalues = dmap.eigenvalues >>> eigenvectors = dmap.eigenvectors -From here we can perform an embedding onto the k dominant eigenvectors. - - >>> - >>> params_on_frame = dmap.transform(ts_one) - -This is calculated from an ad hoc determination of the spectral gap, if a more -rigorous investigation of the data is expected, you should probably do this on -your own. +From here we can perform an embedding onto the k dominant eigenvectors. This +is similar to the idea of a transform in Principal Component Analysis, but the +non-linearity of the map means there is no explicit relationship between the +lower dimensional space and our original trajectory. However, this is an +isometry (distance preserving map), which means that points close in the lower +dimensional space are close in the higher-dimensional space and vice versa. +In order to embed into the most relevant low-dimensional space, there should +exist some number of dominant eigenvectors, whose corresponding eigenvalues +diminish at a constant rate until falling off, this is referred to as a +spectral gap and should be somewhat apparent for a system at equilibrium with a +high number of frames. + + >>> num_dominant_eigenvectors = # some number less than the number of frames + >>> fit = dmap.transform(num_dominant_eigenvectors) + +From here it can be difficult to interpret the data, and is left as a task +for the user. The `diffusion distance` between frames i and j is best +approximated by the euclidean distance between rows i and j of self.fit. Classes ------- @@ -120,7 +130,8 @@ .. If you choose the default metric, this module uses the fast QCP algorithm [Theobald2005]_ to calculate the root mean square distance (RMSD) between -two coordinate sets (as implemented in :func:`MDAnalysis.lib.qcprot.CalcRMSDRotationalMatrix`). +two coordinate sets (as implemented +in :func:`MDAnalysis.lib.qcprot.CalcRMSDRotationalMatrix`). When using this module in published work please cite [Theobald2005]_. @@ -196,27 +207,27 @@ def __init__(self, u, select='all', metric=rms.rmsd, cutoff=1E0-5, self._setup_frames(traj, start, stop, step) def _prepare(self): - logger.info('numframes {0}'.format(self.nframes)) self.dist_matrix = np.zeros((self.nframes, self.nframes)) + self._i = -1 def _single_frame(self): - i = self._ts.frame + self._i = self._i + 1 i_ref = self.atoms.positions - self.atoms.center_of_mass() # diagonal entries need not be calculated due to metric(x,x) == 0 in # theory, _ts not updated properly. Possible savings by setting a # cutoff for significant decimal places to sparsify matrix - for j, ts in enumerate(self._u.trajectory[i:self.stop:self.step]): - if i == j: - self.dist_matrix[i,i] = 0 + for j, ts in enumerate(self._u.trajectory[self._i:self.stop:self.step]): + if self._i == j: + self.dist_matrix[self._i, self._i] = 0 else: + logger.info('j : {0}'.format(j)) self._ts = ts j_ref = self.atoms.positions-self.atoms.center_of_mass() dist = self._metric(i_ref, j_ref, weights=self._weights) - self.dist_matrix[i, j] = dist if dist > self._cutoff else 0 - self.dist_matrix[j, i] = self.dist_matrix[i, j] - - self._ts = self._u.trajectory[i] + self.dist_matrix[self._i, j] = dist if dist > self._cutoff else 0 + self.dist_matrix[j, self._i] = self.dist_matrix[self._i, j] + self._ts = self._u.trajectory.ts def save(self, filename): np.savetxt(filename, self.dist_matrix) @@ -388,14 +399,20 @@ def transform(self, num_eigenvectors): Parameter --------- num_eigenvectors : int - The number of dominant eigenvectors to be used for diffusion mapping - + The number of dominant eigenvectors to be used for + diffusion mapping + + Return + ------ + fit : array + The diffusion map embedding as defined by [Ferguson1]_. + This isn't a linear transformation, but an isometry + between the higher dimensional space and the space spanned by + the eigenvectors. """ self.num_eigenvectors = num_eigenvectors self._setup_frames(self.DistanceMatrix._trajectory, self.DistanceMatrix.start, self.DistanceMatrix.stop, self.DistanceMatrix.step) - - self.run() return self.fit From aea25a787aacd51e140b4fbe8acd2bdd10239e7b Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Sat, 18 Jun 2016 20:44:59 -0700 Subject: [PATCH 13/22] Fixed bug in _single_frame() Eliminated spectral gap function... for now Fixed matrix multiplication bug Fixed save issue Better single_frame loop Name changes, styling updates and doc fixes. --- package/MDAnalysis/analysis/diffusionmap.py | 126 ++++++++++-------- .../analysis/test_diffusionmap.py | 6 +- 2 files changed, 74 insertions(+), 58 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index e033c00b9ab..cfa9c7af921 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -71,6 +71,7 @@ >>> u = MDAnalysis.Universe(PSF,DCD) >>> dist_matrix = DistanceMatrix(u) + >>> dist_matrix.run() We leave determination of the appropriate scale parameter epsilon to the user, [Clementi1]_ uses a complex method involving the k-nearest-neighbors of a @@ -82,7 +83,7 @@ `determine_epsilon` function required by the API before running the diffusion map. - >>> epsilon_matrix = EpsilonConstant(DistanceMatrix, 500) + >>> epsilon_matrix = EpsilonConstant(distance_matrix, 1) >>> epsilon_matrix.determine_epsilon() >>> dmap = diffusionmap.DiffusionMap(dist_matrix, epsilon_matrix) >>> dmap.decompose_kernel() @@ -102,11 +103,12 @@ high number of frames. >>> num_dominant_eigenvectors = # some number less than the number of frames - >>> fit = dmap.transform(num_dominant_eigenvectors) + >>> dmap.transform(num_dominant_eigenvectors) From here it can be difficult to interpret the data, and is left as a task for the user. The `diffusion distance` between frames i and j is best -approximated by the euclidean distance between rows i and j of self.fit. +approximated by the euclidean distance between rows i and j of +self.diffusion_space. Classes ------- @@ -141,8 +143,7 @@ import numpy as np -from MDAnalysis.analysis import rms - +from .rms import rmsd from .base import AnalysisBase logger = logging.getLogger("MDAnalysis.analysis.diffusionmap") @@ -159,13 +160,14 @@ class DistanceMatrix(AnalysisBase): dist_matrix : array Array of all possible ij metric distances between frames in trajectory. This matrix is symmetric with zeros on the diagonal. - + calculated : boolean + Boolean indicating if the distance matrix has been calculated. Methods ------- save(filename) Save the `dist_matrix` to a given filename """ - def __init__(self, u, select='all', metric=rms.rmsd, cutoff=1E0-5, + def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5, weights=None, start=None, stop=None, step=None): """ Parameters @@ -203,34 +205,35 @@ def __init__(self, u, select='all', metric=rms.rmsd, cutoff=1E0-5, self._metric = metric self._cutoff = cutoff self._weights = weights + self.calculated = False # remember that this must be called before referencing self.nframes self._setup_frames(traj, start, stop, step) def _prepare(self): self.dist_matrix = np.zeros((self.nframes, self.nframes)) - self._i = -1 def _single_frame(self): - self._i = self._i + 1 + iframe = self._ts.frame i_ref = self.atoms.positions - self.atoms.center_of_mass() - # diagonal entries need not be calculated due to metric(x,x) == 0 in # theory, _ts not updated properly. Possible savings by setting a # cutoff for significant decimal places to sparsify matrix - for j, ts in enumerate(self._u.trajectory[self._i:self.stop:self.step]): - if self._i == j: - self.dist_matrix[self._i, self._i] = 0 - else: - logger.info('j : {0}'.format(j)) - self._ts = ts - j_ref = self.atoms.positions-self.atoms.center_of_mass() - dist = self._metric(i_ref, j_ref, weights=self._weights) - self.dist_matrix[self._i, j] = dist if dist > self._cutoff else 0 - self.dist_matrix[j, self._i] = self.dist_matrix[self._i, j] - self._ts = self._u.trajectory.ts + for j, ts in enumerate(self._u.trajectory[iframe:self.stop:self.step]): + self._ts = ts + j_ref = self.atoms.positions-self.atoms.center_of_mass() + dist = self._metric(i_ref, j_ref, weights=self._weights) + self.dist_matrix[self._index, j+self._index] = (dist if dist > + self._cutoff else 0) + self.dist_matrix[j+self._index, self._index] = (self.dist_matrix[ + self._index, + j+self._index]) + self._ts = self._u.trajectory[iframe] + + def _conclude(self): + self.calculated = True def save(self, filename): - np.savetxt(filename, self.dist_matrix) + np.save(filename, self.dist_matrix) logger.info("Wrote the distance-squared matrix to file %r", filename) @@ -239,16 +242,17 @@ class Epsilon(object): Attributes ---------- - scaledMatrix : DistanceMatrix object + scaled_matrix : DistanceMatrix object A matrix with each term divided by a local scale parameter - + calculated : boolean + True after determine epsilon called Methods ------- - determineEpsilon() + determine_epsilon() Determine local scale parameters using a chosen algorithm """ def __init__(self, DistanceMatrix, **kwargs): - self._DistanceMatrix = DistMatrix + self._dist_matrix = DistMatrix def determine_epsilon(self): pass @@ -265,11 +269,18 @@ def __init__(self, DistanceMatrix, epsilon): epsilon : int The value of epsilon to be used as a local scale parameter """ - self.scaledMatrix = DistanceMatrix.dist_matrix + if DistanceMatrix.calculated: + self.scaled_matrix = DistanceMatrix.dist_matrix + else: + raise AttributeError('Distance Matrix does not exist, was' + 'DistanceMatrix.run() called?') + self._epsilon = epsilon + self.calculated = False def determine_epsilon(self): - self.scaledMatrix /= self._epsilon + self.scaled_matrix /= self._epsilon + self.calculated = True return @@ -285,7 +296,7 @@ class DiffusionMap(AnalysisBase): Eigenvalues of the diffusion map eigenvectors: array Eigenvectors of the diffusion map - fit : array + diffusion_space : array After calling `transform(num_eigenvectors)` the diffusion map embedding into the lower dimensional diffusion space will exist here. @@ -294,24 +305,19 @@ class DiffusionMap(AnalysisBase): decompose_kernel() Constructs an anisotropic diffusion kernel and performs eigenvalue decomposition on it. - - spectral_gap() - Retrieve a guess for the set of eigenvectors reflecting the intrinsic - dimensionality of the molecular system. - transform(num_eigenvectors) Perform an embedding of a frame into the eigenvectors representing the collective coordinates. """ - def __init__(self, DistanceMatrix, epsilon, weights=None, timescale=1): + def __init__(self, DistanceMatrix, Epsilon, weights=None, timescale=1): """ Parameters ------------- DistanceMatrix : DistanceMatrix object Distance matrix to be made into a diffusion kernel and perform an eigenvector decomposition on. - epsilon : Epsilon object + Epsilon : Epsilon object Specifies the method used for the choice of scale parameter in the diffusion map. More information in [1], [2] and [3]. weights: list, optional @@ -322,10 +328,23 @@ def __init__(self, DistanceMatrix, epsilon, weights=None, timescale=1): The number of steps in the random walk, large t reflects global structure whereas small t indicates local structure. """ - self.DistanceMatrix = DistanceMatrix - self._nframes = DistanceMatrix.nframes + # check .run() called + if DistanceMatrix.calculated: + self._dist_matrix = DistanceMatrix + else: + raise AttributeError('Distance Matrix does not exist, was' + 'DistanceMatrix.run() called?') + + self._nframes = self._dist_matrix.nframes self._t = timescale - self._epsilon = epsilon + + # check determine_epsilon called + if Epsilon.calculated: + self._kernel = Epsilon.scaled_matrix + else: + raise AttributeError('scaled_matrix does not exist, was' + 'determine_epsilon() called?') + self._epsilon = Epsilon if weights is None: # weights do not apply to metric but density of data @@ -341,9 +360,7 @@ def __init__(self, DistanceMatrix, epsilon, weights=None, timescale=1): def decompose_kernel(self): # this should be a reference to the same object as - # self.DistanceMatrix.dist_matrix - self._kernel = self._epsilon.scaledMatrix - + # self.dist_matrix.dist_matrix # take negative exponent of scaled matrix to create Isotropic kernel self._kernel = np.exp(-self._kernel) @@ -369,8 +386,8 @@ def decompose_kernel(self): # Apply timescaling for i in range(self._t): - if i > 1: - self._kernel.__matmul__(self._kernel) + if i > 0: + self._kernel = self._kernel.dot(self._kernel) eigenvals, eigenvectors = np.linalg.eig(self._kernel) @@ -378,20 +395,17 @@ def decompose_kernel(self): self.eigenvalues = eigenvals[eg_arg[::-1]] self.eigenvectors = eigenvectors[eg_arg[::-1], :] - def spectral_gap(self): - # TODO - pass - def _prepare(self): - self.fit = np.zeros((self.eigenvectors.shape[0], - self.num_eigenvectors)) + self.diffusion_space = np.zeros((self.eigenvectors.shape[0], + self.num_eigenvectors)) def _single_frame(self): # The diffusion map embedding takes the ith sample in the # data matrix and maps it to each of the ith coordinates # in the set of k-dominant eigenvectors for k in range(self.num_eigenvectors): - self.fit[self._ts.frame][k] = self.eigenvectors[k][self._ts.frame] + self.diffusion_space[self._index][k] = (self.eigenvectors[k][ + self._index]) def transform(self, num_eigenvectors): """ Embeds a trajectory via the diffusion map @@ -404,15 +418,15 @@ def transform(self, num_eigenvectors): Return ------ - fit : array + diffusion_space : array The diffusion map embedding as defined by [Ferguson1]_. This isn't a linear transformation, but an isometry between the higher dimensional space and the space spanned by the eigenvectors. """ self.num_eigenvectors = num_eigenvectors - self._setup_frames(self.DistanceMatrix._trajectory, - self.DistanceMatrix.start, self.DistanceMatrix.stop, - self.DistanceMatrix.step) + self._setup_frames(self._dist_matrix._u.trajectory, + self._dist_matrix.start, self._dist_matrix.stop, + self._dist_matrix.step) self.run() - return self.fit + return self.diffusion_space diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index 42c9ec7b741..b6417ebf98d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -69,9 +69,10 @@ def test_weights(self): def test_wrong_weights(self): dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon, weights=np.ones((2,))) + def test_timescaling(self): dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon, - timescale=2) + timescale=2) dmap.decompose_kernel() assert_equal(dmap.eigenvalues.shape, (self.dist.nframes, )) # makes no sense to test values here, no physical meaning @@ -87,5 +88,6 @@ def test_different_steps(self): def test_transform(self): self.num_eigenvectors = 4 self.dmap.transform(self.num_eigenvectors) - assert_equal(self.dmap.fit.shape, (self.eigvects.shape[0], + assert_equal(self.dmap.diffusion_space.shape, + (self.eigvects.shape[0], self.num_eigenvectors)) From a9a5dbe09a4ee62883f5aca31a77f7083a2bc445 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Tue, 28 Jun 2016 18:38:46 -0700 Subject: [PATCH 14/22] Refactor of code to stop DiffusionMap inheriting from AnalysisBase Changed code further to make transform a simple iteration. Provided functionality for doing a diffusion map with just a distance matrix. --- package/MDAnalysis/analysis/diffusionmap.py | 133 ++++++++++---------- 1 file changed, 65 insertions(+), 68 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index cfa9c7af921..6b20f71ffc9 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -160,8 +160,7 @@ class DistanceMatrix(AnalysisBase): dist_matrix : array Array of all possible ij metric distances between frames in trajectory. This matrix is symmetric with zeros on the diagonal. - calculated : boolean - Boolean indicating if the distance matrix has been calculated. + Methods ------- save(filename) @@ -205,7 +204,7 @@ def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5, self._metric = metric self._cutoff = cutoff self._weights = weights - self.calculated = False + self._calculated = False # remember that this must be called before referencing self.nframes self._setup_frames(traj, start, stop, step) @@ -214,23 +213,22 @@ def _prepare(self): def _single_frame(self): iframe = self._ts.frame - i_ref = self.atoms.positions - self.atoms.center_of_mass() + i_ref = self.atoms.positions # diagonal entries need not be calculated due to metric(x,x) == 0 in # theory, _ts not updated properly. Possible savings by setting a # cutoff for significant decimal places to sparsify matrix for j, ts in enumerate(self._u.trajectory[iframe:self.stop:self.step]): self._ts = ts - j_ref = self.atoms.positions-self.atoms.center_of_mass() + j_ref = self.atoms.positions dist = self._metric(i_ref, j_ref, weights=self._weights) - self.dist_matrix[self._index, j+self._index] = (dist if dist > - self._cutoff else 0) - self.dist_matrix[j+self._index, self._index] = (self.dist_matrix[ - self._index, - j+self._index]) + self.dist_matrix[self._frame_index, j+self._frame_index] = ( + dist if dist > self._cutoff else 0) + self.dist_matrix[j+self._frame_index, self._frame_index] = ( + self.dist_matrix[self._frame_index, j+self._frame_index]) self._ts = self._u.trajectory[iframe] def _conclude(self): - self.calculated = True + self._calculated = True def save(self, filename): np.save(filename, self.dist_matrix) @@ -242,19 +240,27 @@ class Epsilon(object): Attributes ---------- - scaled_matrix : DistanceMatrix object + scaled_matrix : numpy array A matrix with each term divided by a local scale parameter - calculated : boolean - True after determine epsilon called + Methods ------- determine_epsilon() Determine local scale parameters using a chosen algorithm """ - def __init__(self, DistanceMatrix, **kwargs): - self._dist_matrix = DistMatrix + def __init__(self, dist_matrix): + if dist_matrix._calculated: + # the DistanceMatrix object + self.dist_matrix = dist_matrix + # the actual numpy array containing the data + self.scaled_matrix = dist_matrix.dist_matrix + else: + raise AttributeError('Distance Matrix does not exist, was' + 'DistanceMatrix.run() called?') + self._calculated = False def determine_epsilon(self): + # set self.calculated = True here pass @@ -262,29 +268,25 @@ class EpsilonConstant(Epsilon): """Premade function for the determination of epsilon based on providing an arbitrary constant""" - def __init__(self, DistanceMatrix, epsilon): + def __init__(self, dist_matrix, epsilon): """ Parameters ---------- epsilon : int The value of epsilon to be used as a local scale parameter """ - if DistanceMatrix.calculated: - self.scaled_matrix = DistanceMatrix.dist_matrix - else: - raise AttributeError('Distance Matrix does not exist, was' - 'DistanceMatrix.run() called?') - + Epsilon.__init__(self, dist_matrix) self._epsilon = epsilon - self.calculated = False def determine_epsilon(self): - self.scaled_matrix /= self._epsilon - self.calculated = True + # set self.calculated = True here + if not self._calculated: + self.scaled_matrix /= self._epsilon + self._calculated = True return -class DiffusionMap(AnalysisBase): +class DiffusionMap(object): """Non-linear dimension reduction method Dimension reduction with diffusion mapping of selected structures in a @@ -310,17 +312,19 @@ class DiffusionMap(AnalysisBase): the collective coordinates. """ - def __init__(self, DistanceMatrix, Epsilon, weights=None, timescale=1): + def __init__(self, dist_matrix, epsilon=1, manifold_density=None, timescale=1): """ Parameters ------------- - DistanceMatrix : DistanceMatrix object + dist_matrix : DistanceMatrix object Distance matrix to be made into a diffusion kernel and perform an eigenvector decomposition on. - Epsilon : Epsilon object + epsilon : Float or Epsilon object Specifies the method used for the choice of scale parameter in the - diffusion map. More information in [1], [2] and [3]. - weights: list, optional + diffusion map. More information in [1], [2] and [3]. If a float + is given, this will be the scale parameter used for + the kernel. + manifold_density: list, optional The list has to have the same length as the trajectory. With 'None' the weight of each frame of the trajectory will be the same. @@ -328,25 +332,19 @@ def __init__(self, DistanceMatrix, Epsilon, weights=None, timescale=1): The number of steps in the random walk, large t reflects global structure whereas small t indicates local structure. """ - # check .run() called - if DistanceMatrix.calculated: - self._dist_matrix = DistanceMatrix - else: - raise AttributeError('Distance Matrix does not exist, was' - 'DistanceMatrix.run() called?') - + # .run() called in _prepare + self._dist_matrix = dist_matrix + # because we check for .run() in Epsilon class, we must be sure to call + # dist_matrix.run() before determine_epsilon, if a user wishes to + # provide their own epsilon class, dist_matrix.run() must be run + # prior to Diffusion Map initilization + self._epsilon = epsilon + # important for transform function self._nframes = self._dist_matrix.nframes + # determines length of diffusion process self._t = timescale - # check determine_epsilon called - if Epsilon.calculated: - self._kernel = Epsilon.scaled_matrix - else: - raise AttributeError('scaled_matrix does not exist, was' - 'determine_epsilon() called?') - self._epsilon = Epsilon - - if weights is None: + if manifold_density is None: # weights do not apply to metric but density of data self._weights_ker = np.ones((self._nframes,)) else: @@ -354,15 +352,23 @@ def __init__(self, DistanceMatrix, Epsilon, weights=None, timescale=1): raise ValueError("The weight should have the same length as " 'the trajectory') else: - # weights are constructed as relative to the mean + # density weights are constructed as relative to the mean self._weights_ker = (np.asarray(weights, dtype=np.float64) / np.mean(weights)) - def decompose_kernel(self): + def run(self): + # will only run if distance matrix not already calculated + self._dist_matrix.run() + # this is the case when user provides no Epsilon class + if isinstance(self._epsilon, float) or isinstance(self._epsilon, int): + self._epsilon = EpsilonConstant(self._dist_matrix, + self._epsilon) + + self._epsilon.determine_epsilon() # this should be a reference to the same object as # self.dist_matrix.dist_matrix # take negative exponent of scaled matrix to create Isotropic kernel - self._kernel = np.exp(-self._kernel) + self._kernel = np.exp(-self._epsilon.scaled_matrix) # define an anistropic diffusion term q q_vector = np.zeros((self._nframes, )) @@ -394,18 +400,7 @@ def decompose_kernel(self): eg_arg = np.argsort(eigenvals) self.eigenvalues = eigenvals[eg_arg[::-1]] self.eigenvectors = eigenvectors[eg_arg[::-1], :] - - def _prepare(self): - self.diffusion_space = np.zeros((self.eigenvectors.shape[0], - self.num_eigenvectors)) - - def _single_frame(self): - # The diffusion map embedding takes the ith sample in the - # data matrix and maps it to each of the ith coordinates - # in the set of k-dominant eigenvectors - for k in range(self.num_eigenvectors): - self.diffusion_space[self._index][k] = (self.eigenvectors[k][ - self._index]) + self._calculated = True def transform(self, num_eigenvectors): """ Embeds a trajectory via the diffusion map @@ -424,9 +419,11 @@ def transform(self, num_eigenvectors): between the higher dimensional space and the space spanned by the eigenvectors. """ - self.num_eigenvectors = num_eigenvectors - self._setup_frames(self._dist_matrix._u.trajectory, - self._dist_matrix.start, self._dist_matrix.stop, - self._dist_matrix.step) - self.run() + self.diffusion_space = np.zeros((self.eigenvectors.shape[0], + num_eigenvectors)) + + for i in range(self.nframes): + for j in range(num_eigenvectors): + self.diffusion_space[i][j] = self.eigenvectors[j][i] + return self.diffusion_space From 7492455f5b89fb5bb4a90be69b3e0508ef759b38 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Wed, 29 Jun 2016 11:33:12 -0700 Subject: [PATCH 15/22] Removed epsilon class from pull request, now must supply a constant as a scale parameter. Added exception to be thrown for large distance matrices. Updated diffusion map after some test-driven development, added error thrown for large frame size. Change to warning Removed exception test, fixed typo Fixed transform, kwargs acceptance, removed Epsilon after accidentally adding it back in. Minor fixes to docs. Fixes as suggested to be more pythonic and elegant Fixes to docs, Transform function. --- package/MDAnalysis/analysis/diffusionmap.py | 205 +++++++----------- .../analysis/test_diffusionmap.py | 91 ++++---- 2 files changed, 115 insertions(+), 181 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 6b20f71ffc9..3ed5c82b106 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -70,23 +70,16 @@ eigenvalues and eigenvectors. >>> u = MDAnalysis.Universe(PSF,DCD) - >>> dist_matrix = DistanceMatrix(u) - >>> dist_matrix.run() We leave determination of the appropriate scale parameter epsilon to the user, [Clementi1]_ uses a complex method involving the k-nearest-neighbors of a trajectory frame, whereas others simple use a trial-and-error approach with -a constant epsilon. For those users, a -`~MDAnalysis.analysis.diffusionmap.EpsilonConstant` class has been provided. -Any user choosing to write their own Epsilon class must write a class that -inherits from `~MDAnalysis.analysis.diffusionmap.Epsilon`, and call the -`determine_epsilon` function required by the API before running the diffusion -map. - - >>> epsilon_matrix = EpsilonConstant(distance_matrix, 1) - >>> epsilon_matrix.determine_epsilon() - >>> dmap = diffusionmap.DiffusionMap(dist_matrix, epsilon_matrix) - >>> dmap.decompose_kernel() +a constant epsilon. Users wishing to use a complex method to determine epsilon +will have to initialize a distance matrix and scale it appropriately themselves +and then pass the new scaled distance matrix as a parameter. + + >>> dmap = diffusionmap.DiffusionMap(dist_matrix, epsilo =2) + >>> dmap.run() >>> eigenvalues = dmap.eigenvalues >>> eigenvectors = dmap.eigenvectors @@ -102,33 +95,37 @@ spectral gap and should be somewhat apparent for a system at equilibrium with a high number of frames. - >>> num_dominant_eigenvectors = # some number less than the number of frames - >>> dmap.transform(num_dominant_eigenvectors) + >>> num_eigenvectors = # some number less than the number of frames + >>> fit = dmap.transform(num_eigenvectors) From here it can be difficult to interpret the data, and is left as a task for the user. The `diffusion distance` between frames i and j is best approximated by the euclidean distance between rows i and j of -self.diffusion_space. +self.diffusion_space. A Jupyter notebook providing an analysis of protein +opening and closing is provided [here](#TODO url to notebook) Classes ------- .. autoclass:: DiffusionMap .. autoclass:: DistMatrix -.. autoclass:: Epsilon References --------- If you use this Dimension Reduction method in a publication, please reference: -..[Ferguson1] Ferguson, A. L.; Panagiotopoulos, A. Z.; Kevrekidis, I. G.; -Debenedetti, P. G. Chem. Phys. Lett. 2011, 509, 1−11 -..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. -Appl. Comput. Harmon. Anal. 21, 5–30. -..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) +..[Ferguson1] Ferguson, A. L.; Panagiotopoulos, A. Z.; Kevrekidis, I. G. +Debenedetti, P. G. Nonlinear dimensionality reduction in molecular +simulation: The diffusion map approach Chem. Phys. Lett. 509, 1−11 (2011) +..[Lafon1] Coifman, Ronald R., Lafon, Stephane Diffusion maps. +Appl. Comput. Harmon. Anal. 21, 5–30 (2006). +..[Lafon2] Boaz Nadler, Stéphane Lafon, Ronald R. Coifman, Ioannis G. Kevrekidis. +Diffusion maps, spectral clustering and reaction coordinates +of dynamical systems. Appl. Comput. Harmon. Anal. 21 (2006) 113–127 +..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. Determination of reaction coordinates via locally scaled -diffusion map. Journal of Chemical Physics. +diffusion map. J. Chem. Phys. 134, 124116 (2011). .. If you choose the default metric, this module uses the fast QCP algorithm [Theobald2005]_ to calculate the root mean square distance (RMSD) between @@ -140,7 +137,9 @@ """ from six.moves import range import logging +import warnings +import MDAnalysis as mda import numpy as np from .rms import rmsd @@ -235,57 +234,6 @@ def save(self, filename): logger.info("Wrote the distance-squared matrix to file %r", filename) -class Epsilon(object): - """Manipulates a distance matrix by local scale parameters - - Attributes - ---------- - scaled_matrix : numpy array - A matrix with each term divided by a local scale parameter - - Methods - ------- - determine_epsilon() - Determine local scale parameters using a chosen algorithm - """ - def __init__(self, dist_matrix): - if dist_matrix._calculated: - # the DistanceMatrix object - self.dist_matrix = dist_matrix - # the actual numpy array containing the data - self.scaled_matrix = dist_matrix.dist_matrix - else: - raise AttributeError('Distance Matrix does not exist, was' - 'DistanceMatrix.run() called?') - self._calculated = False - - def determine_epsilon(self): - # set self.calculated = True here - pass - - -class EpsilonConstant(Epsilon): - """Premade function for the determination of epsilon based on providing - an arbitrary constant""" - - def __init__(self, dist_matrix, epsilon): - """ - Parameters - ---------- - epsilon : int - The value of epsilon to be used as a local scale parameter - """ - Epsilon.__init__(self, dist_matrix) - self._epsilon = epsilon - - def determine_epsilon(self): - # set self.calculated = True here - if not self._calculated: - self.scaled_matrix /= self._epsilon - self._calculated = True - return - - class DiffusionMap(object): """Non-linear dimension reduction method @@ -299,77 +247,79 @@ class DiffusionMap(object): eigenvectors: array Eigenvectors of the diffusion map diffusion_space : array - After calling `transform(num_eigenvectors)` the diffusion map embedding + After calling `transform(n_eigenvectors)` the diffusion map embedding into the lower dimensional diffusion space will exist here. Methods ------- - decompose_kernel() + run() Constructs an anisotropic diffusion kernel and performs eigenvalue decomposition on it. - transform(num_eigenvectors) + transform(n_eigenvectors, time) Perform an embedding of a frame into the eigenvectors representing the collective coordinates. """ - def __init__(self, dist_matrix, epsilon=1, manifold_density=None, timescale=1): + def __init__(self, u, epsilon=1, manifold_density=None, + timescale=1, **kwargs): """ Parameters ------------- - dist_matrix : DistanceMatrix object - Distance matrix to be made into a diffusion kernel and perform - an eigenvector decomposition on. - epsilon : Float or Epsilon object + u : MDAnalysis Universe or DistanceMatrix object + Can be a Universe, in which case one must supply kwargs for the + initialization of a DistanceMatrix. Otherwise, this can be a + DistanceMatrix already initialized. Either way, this will be made + into a diffusion kernel. + epsilon : Float Specifies the method used for the choice of scale parameter in the - diffusion map. More information in [1], [2] and [3]. If a float - is given, this will be the scale parameter used for - the kernel. + diffusion map. More information in [1], [2] and [3], Default: 1. manifold_density: list, optional The list has to have the same length as the trajectory. With 'None' the weight of each frame of the trajectory will be the - same. + same, Default : None timescale: int, optional The number of steps in the random walk, large t reflects global - structure whereas small t indicates local structure. - """ - # .run() called in _prepare - self._dist_matrix = dist_matrix - # because we check for .run() in Epsilon class, we must be sure to call - # dist_matrix.run() before determine_epsilon, if a user wishes to - # provide their own epsilon class, dist_matrix.run() must be run - # prior to Diffusion Map initilization + structure whereas small t indicates local structure, Default: 1 + """ + if isinstance(u, mda.Universe): + self._dist_matrix = DistanceMatrix(u, **kwargs) + elif isinstance(u, DistanceMatrix): + self._dist_matrix = u + else: + raise ValueError("U is not a Universe or DistanceMatrix and" + " so the DiffusionMap has no data to work with.") self._epsilon = epsilon - # important for transform function + # important for transform function and length of .run() method self._nframes = self._dist_matrix.nframes + if self._nframes > 2000: + warnings.warn("The distance matrix is very large, and can " + "be very slow to compute. Consider picking a larger " + "step size in distance matrix initialization.") + # determines length of diffusion process - self._t = timescale + self._timescale = timescale if manifold_density is None: # weights do not apply to metric but density of data self._weights_ker = np.ones((self._nframes,)) else: - if weights.shape[0] != self._nframes: + if manifold_density.shape[0] != self._nframes: raise ValueError("The weight should have the same length as " 'the trajectory') else: # density weights are constructed as relative to the mean - self._weights_ker = (np.asarray(weights, dtype=np.float64) / - np.mean(weights)) + self._weights_ker = (np.asarray(manifold_density, + dtype=np.float64) / + np.mean(manifold_density)) def run(self): - # will only run if distance matrix not already calculated - self._dist_matrix.run() - # this is the case when user provides no Epsilon class - if isinstance(self._epsilon, float) or isinstance(self._epsilon, int): - self._epsilon = EpsilonConstant(self._dist_matrix, - self._epsilon) - - self._epsilon.determine_epsilon() - # this should be a reference to the same object as - # self.dist_matrix.dist_matrix + # run only if distance matrix not already calculated + if not self._dist_matrix._calculated: + self._dist_matrix.run() + self._scaled_matrix = (self._dist_matrix.dist_matrix ** 2 / + self._epsilon) # take negative exponent of scaled matrix to create Isotropic kernel - self._kernel = np.exp(-self._epsilon.scaled_matrix) - + self._kernel = np.exp(-self._scaled_matrix) # define an anistropic diffusion term q q_vector = np.zeros((self._nframes, )) # weights should reflect the density of the points on the manifold @@ -377,7 +327,8 @@ def run(self): q_vector[i] = np.dot(self._kernel[i, :], self._weights_ker) # Form a new kernel from the anisotropic diffusion term q - self._kernel /= np.sqrt(q_vector[:, np.newaxis].dot(q_vector[np.newaxis])) + self._kernel /= (np.sqrt(q_vector[:, np.newaxis + ].dot(q_vector[np.newaxis]))) # Weighted Graph Laplacian normalization on this new graph d_vector = np.zeros((self._nframes, )) @@ -391,39 +342,31 @@ def run(self): self._kernel /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) # Apply timescaling - for i in range(self._t): - if i > 0: - self._kernel = self._kernel.dot(self._kernel) + if self._timescale > 1: + self._kernel = np.linalg.matrix_power(self._kernel, + self._timescale) eigenvals, eigenvectors = np.linalg.eig(self._kernel) - eg_arg = np.argsort(eigenvals) self.eigenvalues = eigenvals[eg_arg[::-1]] - self.eigenvectors = eigenvectors[eg_arg[::-1], :] + self.eigenvectors = eigenvectors[eg_arg[::-1]] self._calculated = True - def transform(self, num_eigenvectors): + def transform(self, n_eigenvectors, time): """ Embeds a trajectory via the diffusion map Parameter --------- - num_eigenvectors : int + n_eigenvectors : int The number of dominant eigenvectors to be used for diffusion mapping - + time : float + Exponent that eigenvalues are raised to for embedding, for large + values, more dominant eigenvectors determine diffusion distance. Return ------ diffusion_space : array The diffusion map embedding as defined by [Ferguson1]_. - This isn't a linear transformation, but an isometry - between the higher dimensional space and the space spanned by - the eigenvectors. """ - self.diffusion_space = np.zeros((self.eigenvectors.shape[0], - num_eigenvectors)) - - for i in range(self.nframes): - for j in range(num_eigenvectors): - self.diffusion_space[i][j] = self.eigenvectors[j][i] - - return self.diffusion_space + return (self.eigenvectors[1:n_eigenvectors+1,].T * + (self.eigenvalues[1:n_eigenvectors+1]**time)) diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index b6417ebf98d..73149cbad20 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -17,77 +17,68 @@ import numpy as np import MDAnalysis import MDAnalysis.analysis.diffusionmap as diffusionmap -from numpy.testing import (assert_almost_equal, assert_equal, raises) +from numpy.testing import (assert_almost_equal, assert_equal, + assert_array_almost_equal,raises) from MDAnalysisTests.datafiles import PDB, XTC class TestDiffusionmap(object): - def __init__(self): - # slow 6s test - # u = MDAnalysis.Universe(PSF,DCD) - # eg,ev=diffusionmap.diffusionmap(u) - # assert_equal(self.eg.shape, (98,)) - # assert_almost_equal(self.eg[0], 1.0) - # assert_almost_equal(self.eg[-1],0.03174182) - # assert_equal(self.ev.shape, (98,98)) - # assert_almost_equal(self.ev[0,0], .095836037343022831) - # faster + def setUp(self): self.u = MDAnalysis.Universe(PDB, XTC) self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone') - self.dist.run() - self.epsilon = diffusionmap.EpsilonConstant(self.dist, 500) - self.epsilon.determine_epsilon() - self.dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon) - self.dmap.decompose_kernel() + self.dmap = diffusionmap.DiffusionMap(self.dist) + self.dmap.run() self.eigvals = self.dmap.eigenvalues self.eigvects = self.dmap.eigenvectors - self.weights = np.ones((self.dist.nframes, )) def test_eg(self): # number of frames is trajectory is now 10 vs. 98 assert_equal(self.eigvals.shape, (self.dist.nframes, )) # makes no sense to test values here, no physical meaning - assert_almost_equal(self.eigvals[0], 1.0, decimal=5) - - def test_ev(self): - #makes no sense to test values here, no physical meaning - assert_equal(self.eigvects.shape, (self.dist.nframes, self.dist.nframes)) + del self.dmap - def test_weights(self): - dist = diffusionmap.DistanceMatrix(self.u, select='backbone') - dist.run() - dmap = diffusionmap.DiffusionMap(dist,self.epsilon, - weights=self.weights) - dmap.decompose_kernel() - assert_almost_equal(self.eigvals, dmap.eigenvalues, decimal=5) - assert_almost_equal(self.eigvects, dmap.eigenvectors, decimal=6) + def test_dist_weights(self): + backbone = self.u.select_atoms('backbone') + weights_atoms = np.ones(len(backbone.atoms)) + self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone', + weights=weights_atoms) + self.dist.run() + del self.dist + def test_kernel_weights(self): + self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone') + self.weights_ker = np.ones((self.dist.nframes, )) + self.dmap = diffusionmap.DiffusionMap(self.dist, + manifold_density=self.weights_ker) + self.dmap.run() + assert_array_almost_equal(self.eigvals, self.dmap.eigenvalues, decimal=5) + assert_array_almost_equal(self.eigvects, self.dmap.eigenvectors, decimal=6) + del self.dist, self.dmap @raises(ValueError) - def test_wrong_weights(self): - dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon, - weights=np.ones((2,))) - - def test_timescaling(self): - dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon, - timescale=2) - dmap.decompose_kernel() - assert_equal(dmap.eigenvalues.shape, (self.dist.nframes, )) - # makes no sense to test values here, no physical meaning - assert_almost_equal(self.eigvals[0], 1.0, decimal=5) + def test_wrong_kernel_weights(self): + self.dmap = diffusionmap.DiffusionMap(self.dist, + manifold_density=np.ones((2,))) + del self.dmap + def test_timescaling(self): + self.dmap = diffusionmap.DiffusionMap(self.u, timescale=2) + self.dmap.run() + assert_equal(self.dmap.eigenvalues.shape, (self.dmap._nframes, )) + del self.dmap def test_different_steps(self): - dist = diffusionmap.DistanceMatrix(self.u, select='backbone',step=3) - dist.run() - dmap = diffusionmap.DiffusionMap(self.dist, self.epsilon) - dmap.decompose_kernel() + self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone', step=3) + self.dmap.run() + del self.dmap, self.dist def test_transform(self): - self.num_eigenvectors = 4 - self.dmap.transform(self.num_eigenvectors) - assert_equal(self.dmap.diffusion_space.shape, - (self.eigvects.shape[0], - self.num_eigenvectors)) + self.n_eigenvectors = 4 + self.dmap = diffusionmap.DiffusionMap(self.u) + self.dmap.run() + diffusion_space = self.dmap.transform(self.n_eigenvectors,1) + assert_equal(diffusion_space.shape, + (self.eigvects.shape[0], + self.n_eigenvectors)) From 9418cdc767043fcee476385dd6da3f94bce69cb2 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Sun, 3 Jul 2016 14:18:19 -0700 Subject: [PATCH 16/22] Removed anisotropic kernel creation. Figured out that I was unneccesarily exponentiating a diagonalizable matrix. Removed timescaling init, removed tests for anisotropic kernel and timescaling. --- package/MDAnalysis/analysis/diffusionmap.py | 58 +++---------------- .../analysis/test_diffusionmap.py | 27 +-------- 2 files changed, 8 insertions(+), 77 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 3ed5c82b106..6db1a080dbe 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -260,8 +260,7 @@ class DiffusionMap(object): the collective coordinates. """ - def __init__(self, u, epsilon=1, manifold_density=None, - timescale=1, **kwargs): + def __init__(self, u, epsilon=1, **kwargs): """ Parameters ------------- @@ -273,10 +272,6 @@ def __init__(self, u, epsilon=1, manifold_density=None, epsilon : Float Specifies the method used for the choice of scale parameter in the diffusion map. More information in [1], [2] and [3], Default: 1. - manifold_density: list, optional - The list has to have the same length as the trajectory. - With 'None' the weight of each frame of the trajectory will be the - same, Default : None timescale: int, optional The number of steps in the random walk, large t reflects global structure whereas small t indicates local structure, Default: 1 @@ -296,21 +291,6 @@ def __init__(self, u, epsilon=1, manifold_density=None, "be very slow to compute. Consider picking a larger " "step size in distance matrix initialization.") - # determines length of diffusion process - self._timescale = timescale - - if manifold_density is None: - # weights do not apply to metric but density of data - self._weights_ker = np.ones((self._nframes,)) - else: - if manifold_density.shape[0] != self._nframes: - raise ValueError("The weight should have the same length as " - 'the trajectory') - else: - # density weights are constructed as relative to the mean - self._weights_ker = (np.asarray(manifold_density, - dtype=np.float64) / - np.mean(manifold_density)) def run(self): # run only if distance matrix not already calculated @@ -320,36 +300,12 @@ def run(self): self._epsilon) # take negative exponent of scaled matrix to create Isotropic kernel self._kernel = np.exp(-self._scaled_matrix) - # define an anistropic diffusion term q - q_vector = np.zeros((self._nframes, )) - # weights should reflect the density of the points on the manifold - for i in range(self._nframes): - q_vector[i] = np.dot(self._kernel[i, :], self._weights_ker) - - # Form a new kernel from the anisotropic diffusion term q - self._kernel /= (np.sqrt(q_vector[:, np.newaxis - ].dot(q_vector[np.newaxis]))) - - # Weighted Graph Laplacian normalization on this new graph - d_vector = np.zeros((self._nframes, )) - for i in range(self._nframes): - d_vector[i] = np.dot(self._kernel[i, :], self._weights_ker) - - for i in range(self._nframes): - self._kernel[i, :] = self._kernel[i, :] * self._weights_ker - - # Define anisotropic transition by dividing kernel by this term - self._kernel /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) - - # Apply timescaling - if self._timescale > 1: - self._kernel = np.linalg.matrix_power(self._kernel, - self._timescale) - - eigenvals, eigenvectors = np.linalg.eig(self._kernel) - eg_arg = np.argsort(eigenvals) - self.eigenvalues = eigenvals[eg_arg[::-1]] - self.eigenvectors = eigenvectors[eg_arg[::-1]] + D_inv = np.diag(1 / self._kernel.sum(1)) + self._diff = np.dot(D_inv, self._kernel) + eigenvals, eigenvectors = np.linalg.eig(self._diff) + sort_idx = np.argsort(eigenvals)[::-1] + self.eigenvalues = eigenvals[sort_idx] + self.eigenvectors = eigenvectors[sort_idx] self._calculated = True def transform(self, n_eigenvectors, time): diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index 73149cbad20..2769e88607f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -37,7 +37,6 @@ def test_eg(self): # number of frames is trajectory is now 10 vs. 98 assert_equal(self.eigvals.shape, (self.dist.nframes, )) # makes no sense to test values here, no physical meaning - del self.dmap def test_dist_weights(self): backbone = self.u.select_atoms('backbone') @@ -45,35 +44,11 @@ def test_dist_weights(self): self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone', weights=weights_atoms) self.dist.run() - del self.dist - - def test_kernel_weights(self): - self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone') - self.weights_ker = np.ones((self.dist.nframes, )) - self.dmap = diffusionmap.DiffusionMap(self.dist, - manifold_density=self.weights_ker) - self.dmap.run() - assert_array_almost_equal(self.eigvals, self.dmap.eigenvalues, decimal=5) - assert_array_almost_equal(self.eigvects, self.dmap.eigenvectors, decimal=6) - del self.dist, self.dmap - - @raises(ValueError) - def test_wrong_kernel_weights(self): - self.dmap = diffusionmap.DiffusionMap(self.dist, - manifold_density=np.ones((2,))) - del self.dmap - - def test_timescaling(self): - self.dmap = diffusionmap.DiffusionMap(self.u, timescale=2) - self.dmap.run() - assert_equal(self.dmap.eigenvalues.shape, (self.dmap._nframes, )) - del self.dmap def test_different_steps(self): self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone', step=3) self.dmap.run() - del self.dmap, self.dist - + def test_transform(self): self.n_eigenvectors = 4 self.dmap = diffusionmap.DiffusionMap(self.u) From aa98fa72a02ab171f319438932d983ee915f68e3 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Tue, 5 Jul 2016 11:04:05 -0700 Subject: [PATCH 17/22] Updated docs to remove references to anisotropy, update tutorial [skip ci] --- package/MDAnalysis/analysis/diffusionmap.py | 53 +++++++++------------ 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 6db1a080dbe..44cdc72775b 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -34,10 +34,8 @@ problem. The time complexity of the diffusion map is O(N^3), where N is the number of frames in the trajectory, and the in-memory storage complexity is O(N^2). Instead of a single trajectory a sample of protein structures -can be used. The sample should be equiblibrated, at least locally. Different -weights can be used to determine the anisotropy of the diffusion kernel. -The motivation for the creation of an anisotropic kernel is given on page 14 of -[Lafon1]_. The order of the sampled structures in the trajectory is irrelevant. +can be used. The sample should be equiblibrated, at least locally. The order of +the sampled structures in the trajectory is irrelevant. The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. @@ -52,36 +50,30 @@ The example uses files provided as part of the MDAnalysis test suite (in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and -:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that this trajectory -does not represent a sample from equilibrium. This violates a basic -assumption of the anisotropic kernel. The anisotropic kernel is created to act -as a discrete approximation for the Fokker-Plank equation from statistical -mechanics, results from a non-equilibrium trajectory shouldn't be interpreted -for this reason. This tutorial shows how to use the diffusionmap function. +:data:`~MDAnalysis.tests.datafiles.DCD`). This tutorial shows how to use the +Diffusion Map class. + First load all modules and test data :: >>> import MDAnalysis >>> import numpy as np >>> import MDAnalysis.analysis.diffusionmap as diffusionmap - >>> from MDAnalysis.tests.datafiles import PSF,DCD + >>> from MDAnalysis.tests.datafiles import PSF, DCD -Given a universe or atom group, we can calculate the diffusion map from -that trajectory using :class:`DiffusionMap`:: and get the corresponding -eigenvalues and eigenvectors. +Given a universe or atom group, we can create and eigenvalue decompose +the Diffusion Matrix from that trajectory using :class:`DiffusionMap`:: and get +the corresponding eigenvalues and eigenvectors. >>> u = MDAnalysis.Universe(PSF,DCD) We leave determination of the appropriate scale parameter epsilon to the user, [Clementi1]_ uses a complex method involving the k-nearest-neighbors of a trajectory frame, whereas others simple use a trial-and-error approach with -a constant epsilon. Users wishing to use a complex method to determine epsilon -will have to initialize a distance matrix and scale it appropriately themselves -and then pass the new scaled distance matrix as a parameter. +a constant epsilon. Currently, the constant epsilon method is implemented +by MDAnalysis. - >>> dmap = diffusionmap.DiffusionMap(dist_matrix, epsilo =2) + >>> dmap = diffusionmap.DiffusionMap(u, select='backbone', epsilon=2) >>> dmap.run() - >>> eigenvalues = dmap.eigenvalues - >>> eigenvectors = dmap.eigenvectors From here we can perform an embedding onto the k dominant eigenvectors. This is similar to the idea of a transform in Principal Component Analysis, but the @@ -96,7 +88,7 @@ high number of frames. >>> num_eigenvectors = # some number less than the number of frames - >>> fit = dmap.transform(num_eigenvectors) + >>> fit = dmap.transform(num_eigenvectors, time=1) From here it can be difficult to interpret the data, and is left as a task for the user. The `diffusion distance` between frames i and j is best @@ -118,10 +110,12 @@ ..[Ferguson1] Ferguson, A. L.; Panagiotopoulos, A. Z.; Kevrekidis, I. G. Debenedetti, P. G. Nonlinear dimensionality reduction in molecular simulation: The diffusion map approach Chem. Phys. Lett. 509, 1−11 (2011) +..[deLaPorte1] J. de la Porte, B. M. Herbst, W. Hereman, S. J. van der Walt. +An Introduction to Diffusion Maps. ..[Lafon1] Coifman, Ronald R., Lafon, Stephane Diffusion maps. Appl. Comput. Harmon. Anal. 21, 5–30 (2006). -..[Lafon2] Boaz Nadler, Stéphane Lafon, Ronald R. Coifman, Ioannis G. Kevrekidis. -Diffusion maps, spectral clustering and reaction coordinates +..[Lafon2] Boaz Nadler, Stéphane Lafon, Ronald R. Coifman, Ioannis G. +Kevrekidis. Diffusion maps, spectral clustering and reaction coordinates of dynamical systems. Appl. Comput. Harmon. Anal. 21 (2006) 113–127 ..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. Determination of reaction coordinates via locally scaled @@ -272,10 +266,7 @@ def __init__(self, u, epsilon=1, **kwargs): epsilon : Float Specifies the method used for the choice of scale parameter in the diffusion map. More information in [1], [2] and [3], Default: 1. - timescale: int, optional - The number of steps in the random walk, large t reflects global - structure whereas small t indicates local structure, Default: 1 - """ + """ if isinstance(u, mda.Universe): self._dist_matrix = DistanceMatrix(u, **kwargs) elif isinstance(u, DistanceMatrix): @@ -302,10 +293,10 @@ def run(self): self._kernel = np.exp(-self._scaled_matrix) D_inv = np.diag(1 / self._kernel.sum(1)) self._diff = np.dot(D_inv, self._kernel) - eigenvals, eigenvectors = np.linalg.eig(self._diff) - sort_idx = np.argsort(eigenvals)[::-1] - self.eigenvalues = eigenvals[sort_idx] - self.eigenvectors = eigenvectors[sort_idx] + self._eigenvals, self._eigenvectors = np.linalg.eig(self._diff) + sort_idx = np.argsort(self._eigenvals)[::-1] + self.eigenvalues = self._eigenvals[sort_idx] + self.eigenvectors = self._eigenvectors[sort_idx] self._calculated = True def transform(self, n_eigenvectors, time): From b24364e192389283bcc30e27cc7db68947df4b6a Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Wed, 6 Jul 2016 15:59:00 -0700 Subject: [PATCH 18/22] Added link to jupyter notebook [skip ci] --- package/MDAnalysis/analysis/diffusionmap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 44cdc72775b..52e747041f0 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -93,8 +93,8 @@ From here it can be difficult to interpret the data, and is left as a task for the user. The `diffusion distance` between frames i and j is best approximated by the euclidean distance between rows i and j of -self.diffusion_space. A Jupyter notebook providing an analysis of protein -opening and closing is provided [here](#TODO url to notebook) +self.diffusion_space. A Jupyter [notebook](https://github.com/jdetle/dimension_reduction/blob/master/diffusionMaps/Diffusion_Map_Analysis_of_ADK.ipynb) +providing an analysis of protein pening and closing has been provided. Classes ------- From a4c185a95555f1a40ac8302cac100127658ef4ca Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Thu, 7 Jul 2016 13:45:37 -0700 Subject: [PATCH 19/22] Added space to get sphinx pages to build properly. [skip ci] --- .../sphinx/source/documentation_pages/analysis_modules.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index 29f29697eb1..c3d3c35814a 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -112,5 +112,6 @@ Volumetric analysis Dimension Reduction =================== .. toctree:: - :maxdepth: 1 - analysis/diffusionmap + :maxdepth: 1 + + analysis/diffusionmap From b299d891789d7668b6ac5fde90118a41a19cd9b2 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Thu, 7 Jul 2016 14:22:10 -0700 Subject: [PATCH 20/22] Changed n_frames warning to 5000, eliminated reference to anisotropic kernel in docs fixed sphinx linking, documented metric API. Changes to documentation and some style fixes. More work on docs Change to license v2 Fixed tests Docs fixes, protected eigenvectors --- package/MDAnalysis/analysis/diffusionmap.py | 163 +++++++++++------- .../documentation_pages/analysis_modules.rst | 2 +- .../analysis/test_diffusionmap.py | 8 +- 3 files changed, 109 insertions(+), 64 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 52e747041f0..79e1dab2ebd 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -20,33 +20,28 @@ :Authors: Eugen Hruska, John Detlefs :Year: 2016 -:Copyright: GNU Public License v3 - -The module contains the non-linear dimension reduction method diffusion map. -The diffusion map provides an estimate of the slowest collective -coordinates for a trajectory. This non-linear dimension reduction method -assumes that the trajectory is long enough to represent a probability -distribution of a protein close to the equilibrium. Furthermore, the diffusion -map assumes that the diffusion coefficients associated with the dynamical -motion of molecules in the system are constant. The eigenvectors with -the largest eigenvalues are the more dominant collective coordinates. Assigning -phyiscal meaning of the 'collective coordinates' is a fundamentally difficult -problem. The time complexity of the diffusion map is O(N^3), where N is the -number of frames in the trajectory, and the in-memory storage complexity is -O(N^2). Instead of a single trajectory a sample of protein structures -can be used. The sample should be equiblibrated, at least locally. The order of -the sampled structures in the trajectory is irrelevant. +:Copyright: GNU Public License v2 + +This module contains the non-linear dimension reduction method diffusion map. +The eigenvectors of a diffusion matrix represent the 'collective coordinates' +of a molecule; the largest eigenvalues are the more dominant collective +coordinates. Assigning phyiscal meaning to the 'collective coordinates' is a +fundamentally difficult problem. The time complexity of the diffusion map is +:math:`O(N^3)`, where N is the number of frames in the trajectory, and the in-memory +storage complexity is :math:`O(N^2)`. Instead of a single trajectory a sample of +protein structures can be used. The sample should be equiblibrated, at least +locally. The order of the sampled structures in the trajectory is irrelevant. The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. -More details about diffusion maps are in [Lafon1]_ , [Ferguson1]_, and -[Clementi1]_. +More details about diffusion maps are in [deLaPorte1]_, [Lafon1]_ , +[Ferguson1]_, and [Clementi1]_. .. _Diffusion-Map-tutorial: Diffusion Map tutorial --------------------- +---------------------- The example uses files provided as part of the MDAnalysis test suite (in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and @@ -55,7 +50,7 @@ First load all modules and test data :: - >>> import MDAnalysis + >>> import MDAnalysis as mda >>> import numpy as np >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF, DCD @@ -64,7 +59,7 @@ the Diffusion Matrix from that trajectory using :class:`DiffusionMap`:: and get the corresponding eigenvalues and eigenvectors. - >>> u = MDAnalysis.Universe(PSF,DCD) + >>> u = mda.Universe(PSF,DCD) We leave determination of the appropriate scale parameter epsilon to the user, [Clementi1]_ uses a complex method involving the k-nearest-neighbors of a @@ -75,8 +70,7 @@ >>> dmap = diffusionmap.DiffusionMap(u, select='backbone', epsilon=2) >>> dmap.run() -From here we can perform an embedding onto the k dominant eigenvectors. This -is similar to the idea of a transform in Principal Component Analysis, but the +From here we can perform an embedding onto the k dominant eigenvectors. The non-linearity of the map means there is no explicit relationship between the lower dimensional space and our original trajectory. However, this is an isometry (distance preserving map), which means that points close in the lower @@ -87,40 +81,83 @@ spectral gap and should be somewhat apparent for a system at equilibrium with a high number of frames. - >>> num_eigenvectors = # some number less than the number of frames + >>> # first cell of a jupyter notebook should contain: %matplotlib inline + >>> import matplotlib.pyplot as plt + >>> f, ax = plt.subplots() + >>> upper_limit = # some reasonably high number less than the n_eigenvectors + >>> ax.plot(dmap.eigenvalues[:upper_limit]) + >>> ax.set(xlabel ='eigenvalue index', ylabel='eigenvalue') + >>> plt.tight_layout() + +From here we can transform into the diffusion space + + >>> num_eigenvectors = # some number less than the number of frames after + >>> # inspecting for the spectral gap >>> fit = dmap.transform(num_eigenvectors, time=1) -From here it can be difficult to interpret the data, and is left as a task +It can be difficult to interpret the data, and is left as a task for the user. The `diffusion distance` between frames i and j is best approximated by the euclidean distance between rows i and j of -self.diffusion_space. A Jupyter [notebook](https://github.com/jdetle/dimension_reduction/blob/master/diffusionMaps/Diffusion_Map_Analysis_of_ADK.ipynb) -providing an analysis of protein pening and closing has been provided. +self.diffusion_space. + + +.. _Distance-Matrix-tutorial: + +Distance Matrix tutorial +------------------------ + +Often a, a custom distance matrix could be useful for local +epsilon determination or other manipulations on the diffusion +map method. The :class:`DistanceMatrix` exists in +:mod:`~MDAnalysis.analysis.diffusionmap` and can be passed +as an initialization argument for :class:`DiffusionMap`. + >>> import MDAnalysis as mda + >>> import numpy as np + >>> import MDAnalysis.analysis.diffusionmap as diffusionmap + >>> from MDAnalysis.tests.datafiles import PSF, DCD + +Now create the distance matrix and pass it as an argument to +:class:`DiffusionMap`. + + >>> u = mda.Universe(PSF,DCD) + >>> dist_matrix = diffusionmap.DistanceMatrix(u, select='all') + >>> dist_matrix.run() + >>> dmap = diffusionmap.DiffusionMap(dist_matrix) + >>> dmap.run() Classes ------- .. autoclass:: DiffusionMap -.. autoclass:: DistMatrix +.. autoclass:: DistanceMatrix References ---------- +---------- If you use this Dimension Reduction method in a publication, please -reference: -..[Ferguson1] Ferguson, A. L.; Panagiotopoulos, A. Z.; Kevrekidis, I. G. -Debenedetti, P. G. Nonlinear dimensionality reduction in molecular -simulation: The diffusion map approach Chem. Phys. Lett. 509, 1−11 (2011) -..[deLaPorte1] J. de la Porte, B. M. Herbst, W. Hereman, S. J. van der Walt. +cite: + +.. [Lafon1] +Coifman, Ronald R., Lafon, Stephane Diffusion maps. Appl. Comput. Harmon. +Anal. 21, 5–30 (2006). + +For more information +-------------------- + +.. [deLaPorte1] +J. de la Porte, B. M. Herbst, W. Hereman, S. J. van der Walt. An Introduction to Diffusion Maps. -..[Lafon1] Coifman, Ronald R., Lafon, Stephane Diffusion maps. -Appl. Comput. Harmon. Anal. 21, 5–30 (2006). -..[Lafon2] Boaz Nadler, Stéphane Lafon, Ronald R. Coifman, Ioannis G. -Kevrekidis. Diffusion maps, spectral clustering and reaction coordinates -of dynamical systems. Appl. Comput. Harmon. Anal. 21 (2006) 113–127 -..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. + +.. [Clementi1] +Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. Determination of reaction coordinates via locally scaled diffusion map. J. Chem. Phys. 134, 124116 (2011). +.. [Ferguson1] +Ferguson, A. L.; Panagiotopoulos, A. Z.; Kevrekidis, I. G. +Debenedetti, P. G. Nonlinear dimensionality reduction in molecular simulation: +The diffusion map approach Chem. Phys. Lett. 509, 1−11 (2011) + .. If you choose the default metric, this module uses the fast QCP algorithm [Theobald2005]_ to calculate the root mean square distance (RMSD) between two coordinate sets (as implemented @@ -133,7 +170,7 @@ import logging import warnings -import MDAnalysis as mda +from MDAnalysis.core.AtomGroup import Universe import numpy as np from .rms import rmsd @@ -143,8 +180,12 @@ class DistanceMatrix(AnalysisBase): - """ Calculate the pairwise distance between each frame in a trajectory using - a given metric + """Calculate the pairwise distance between each frame in a trajectory + using a given metric + + A distance matrix can be initialized on its own and used as an + initialization argument in :class:`DiffusionMap`. Refer to the + :ref:`Distance-Matrix-tutorial` for a demonstration. Attributes ---------- @@ -177,7 +218,11 @@ def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5, different frames. Water should be excluded. metric : function, optional Maps two numpy arrays to a float, is positive definite and - symmetric, Default: metric is set to rms.rmsd(). + symmetric. The API for a metric requires that the arrays must have + equal length, and that the function should have weights as an + optional argument. Weights give each index value its own weight for + the metric calculation over the entire arrays. Default: metric is + set to rms.rmsd(). cutoff : float, optional Specify a given cutoff for metric values to be considered equal, Default: 1EO-5 @@ -190,7 +235,6 @@ def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5, step : int, optional Step between frames to analyse, Default: 1 """ - self._u = u traj = self._u.trajectory self.atoms = self._u.select_atoms(select) @@ -198,11 +242,11 @@ def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5, self._cutoff = cutoff self._weights = weights self._calculated = False - # remember that this must be called before referencing self.nframes + # remember that this must be called before referencing self.n_frames self._setup_frames(traj, start, stop, step) def _prepare(self): - self.dist_matrix = np.zeros((self.nframes, self.nframes)) + self.dist_matrix = np.zeros((self.n_frames, self.n_frames)) def _single_frame(self): iframe = self._ts.frame @@ -238,11 +282,6 @@ class DiffusionMap(object): ---------- eigenvalues: array Eigenvalues of the diffusion map - eigenvectors: array - Eigenvectors of the diffusion map - diffusion_space : array - After calling `transform(n_eigenvectors)` the diffusion map embedding - into the lower dimensional diffusion space will exist here. Methods ------- @@ -265,9 +304,13 @@ def __init__(self, u, epsilon=1, **kwargs): into a diffusion kernel. epsilon : Float Specifies the method used for the choice of scale parameter in the - diffusion map. More information in [1], [2] and [3], Default: 1. - """ - if isinstance(u, mda.Universe): + diffusion map. More information in [Lafon1]_, [Ferguson1]_ and + [Clementi1]_, Default: 1. + **kwargs + Parameters to be passed for the initialization of a + :class:`DistanceMatrix`. + """ + if isinstance(u, Universe): self._dist_matrix = DistanceMatrix(u, **kwargs) elif isinstance(u, DistanceMatrix): self._dist_matrix = u @@ -276,14 +319,16 @@ def __init__(self, u, epsilon=1, **kwargs): " so the DiffusionMap has no data to work with.") self._epsilon = epsilon # important for transform function and length of .run() method - self._nframes = self._dist_matrix.nframes - if self._nframes > 2000: + self._n_frames = self._dist_matrix.n_frames + if self._n_frames > 5000: warnings.warn("The distance matrix is very large, and can " "be very slow to compute. Consider picking a larger " "step size in distance matrix initialization.") def run(self): + """ Create and decompose the diffusion matrix in preparation + for a diffusion map.""" # run only if distance matrix not already calculated if not self._dist_matrix._calculated: self._dist_matrix.run() @@ -296,7 +341,7 @@ def run(self): self._eigenvals, self._eigenvectors = np.linalg.eig(self._diff) sort_idx = np.argsort(self._eigenvals)[::-1] self.eigenvalues = self._eigenvals[sort_idx] - self.eigenvectors = self._eigenvectors[sort_idx] + self._eigenvectors = self._eigenvectors[sort_idx] self._calculated = True def transform(self, n_eigenvectors, time): @@ -315,5 +360,5 @@ def transform(self, n_eigenvectors, time): diffusion_space : array The diffusion map embedding as defined by [Ferguson1]_. """ - return (self.eigenvectors[1:n_eigenvectors+1,].T * + return (self._eigenvectors[1:n_eigenvectors+1,].T * (self.eigenvalues[1:n_eigenvectors+1]**time)) diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index c3d3c35814a..3d6a3ec12b0 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -113,5 +113,5 @@ Dimension Reduction =================== .. toctree:: :maxdepth: 1 - + analysis/diffusionmap diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index 2769e88607f..052af99dea9 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -31,24 +31,24 @@ def setUp(self): self.dmap = diffusionmap.DiffusionMap(self.dist) self.dmap.run() self.eigvals = self.dmap.eigenvalues - self.eigvects = self.dmap.eigenvectors + self.eigvects = self.dmap._eigenvectors def test_eg(self): # number of frames is trajectory is now 10 vs. 98 - assert_equal(self.eigvals.shape, (self.dist.nframes, )) + assert_equal(self.eigvals.shape, (self.dist.n_frames, )) # makes no sense to test values here, no physical meaning def test_dist_weights(self): backbone = self.u.select_atoms('backbone') weights_atoms = np.ones(len(backbone.atoms)) self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone', - weights=weights_atoms) + weights=weights_atoms) self.dist.run() def test_different_steps(self): self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone', step=3) self.dmap.run() - + def test_transform(self): self.n_eigenvectors = 4 self.dmap = diffusionmap.DiffusionMap(self.u) From cc384f1a35cf2a3e4a3b31e2ce207d52d369888d Mon Sep 17 00:00:00 2001 From: John Date: Sat, 16 Jul 2016 12:09:08 -0700 Subject: [PATCH 21/22] Updated tests, updated docs --- package/MDAnalysis/analysis/diffusionmap.py | 4 ++-- .../MDAnalysisTests/analysis/test_diffusionmap.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index 79e1dab2ebd..cc13cbd9a90 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -280,7 +280,7 @@ class DiffusionMap(object): Attributes ---------- - eigenvalues: array + eigenvalues: array () Eigenvalues of the diffusion map Methods @@ -357,7 +357,7 @@ def transform(self, n_eigenvectors, time): values, more dominant eigenvectors determine diffusion distance. Return ------ - diffusion_space : array + diffusion_space : array (n_frames, n_eigenvectors) The diffusion map embedding as defined by [Ferguson1]_. """ return (self._eigenvectors[1:n_eigenvectors+1,].T * diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index 052af99dea9..0b5897bd27f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -42,12 +42,22 @@ def test_dist_weights(self): backbone = self.u.select_atoms('backbone') weights_atoms = np.ones(len(backbone.atoms)) self.dist = diffusionmap.DistanceMatrix(self.u, select='backbone', - weights=weights_atoms) + weights=weights_atoms, + step=3) self.dist.run() + self.dmap = diffusionmap.DiffusionMap(self.dist) + self.dmap.run() + assert_array_almost_equal(self.dmap.eigenvalues, [1,1,1,1], 4) + assert_array_almost_equal(self.dmap._eigenvectors, + ([[ 0, 0, 1, 0], + [ 0, 0, 0, 1], + [ -.707,-.707, 0, 0], + [ .707,-.707, 0, 0]]) ,2) def test_different_steps(self): self.dmap = diffusionmap.DiffusionMap(self.u, select='backbone', step=3) self.dmap.run() + assert_equal(self.dmap._eigenvectors.shape, (4,4)) def test_transform(self): self.n_eigenvectors = 4 From 313a174bb33bfc28ced2b38205ac505d38733617 Mon Sep 17 00:00:00 2001 From: John Detlefs Date: Tue, 19 Jul 2016 10:58:55 -0700 Subject: [PATCH 22/22] Fixed docs, returned self --- package/MDAnalysis/analysis/diffusionmap.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index cc13cbd9a90..a4337b50fcc 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -51,7 +51,6 @@ First load all modules and test data :: >>> import MDAnalysis as mda - >>> import numpy as np >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF, DCD @@ -112,7 +111,6 @@ :mod:`~MDAnalysis.analysis.diffusionmap` and can be passed as an initialization argument for :class:`DiffusionMap`. >>> import MDAnalysis as mda - >>> import numpy as np >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF, DCD @@ -191,7 +189,7 @@ class DistanceMatrix(AnalysisBase): ---------- atoms : AtomGroup Selected atoms in trajectory subject to dimension reduction - dist_matrix : array + dist_matrix : array, (n_frames, n_frames) Array of all possible ij metric distances between frames in trajectory. This matrix is symmetric with zeros on the diagonal. @@ -205,7 +203,7 @@ def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5, """ Parameters ---------- - u : trajectory `~MDAnalysis.core.AtomGroup.Universe` + u : universe `~MDAnalysis.core.AtomGroup.Universe` The MD Trajectory for dimension reduction, remember that computational cost of eigenvalue decomposition scales at O(N^3) where N is the number of frames. @@ -280,7 +278,7 @@ class DiffusionMap(object): Attributes ---------- - eigenvalues: array () + eigenvalues: array (n_frames,) Eigenvalues of the diffusion map Methods @@ -343,11 +341,12 @@ def run(self): self.eigenvalues = self._eigenvals[sort_idx] self._eigenvectors = self._eigenvectors[sort_idx] self._calculated = True + return self def transform(self, n_eigenvectors, time): """ Embeds a trajectory via the diffusion map - Parameter + Parameters --------- n_eigenvectors : int The number of dominant eigenvectors to be used for