msmbuilder · schwancr · Jun 18, 2014 · Jun 18, 2014 · Jun 18, 2014 · Jun 18, 2014
diff --git a/Extras/LPRMSD/lprmsd.py b/Extras/LPRMSD/lprmsd.py
@@ -13,6 +13,8 @@
 import _lprmsd
 import mdtraj as md
 
+from collections import namedtuple
+import copy
 import numpy as np
 import itertools
 from scipy import optimize
@@ -60,6 +62,96 @@ def ReadPermFile(fnm):
             continue
     return (LL, K)
 
+
+class TheoData(object):
+    """Stores temporary data required during Theobald RMSD calculation.
+
+    Notes:
+    Storing temporary data allows us to avoid re-calculating the G-Values
+    repeatedly. Also avoids re-centering the coordinates."""
+
+    Theoslice = namedtuple('TheoSlice', ('xyz', 'G'))
+
+    def __init__(self, XYZData, NumAtoms=None, G=None):
+        """Create a container for intermediate values during RMSD Calculation.
+
+        Notes:
+        1.  We remove center of mass.
+        2.  We pre-calculate matrix magnitudes (ConfG)"""
+
+        if NumAtoms is None or G is None:
+            NumConfs = len(XYZData)
+            NumAtoms = XYZData.shape[1]
+
+            XYZData = copy.copy(XYZData)
+            self.centerConformations(XYZData)
+
+            NumAtomsWithPadding = 4 + NumAtoms - NumAtoms % 4
+
+            # Load data and generators into aligned arrays
+            XYZData2 = np.zeros((NumConfs, 3, NumAtomsWithPadding), dtype=np.float32)
+            for i in range(NumConfs):
+                XYZData2[i, 0:3, 0:NumAtoms] = XYZData[i].transpose()
+
+            #Precalculate matrix magnitudes
+            ConfG = np.zeros((NumConfs,),dtype=np.float32)
+            for i in xrange(NumConfs):
+                ConfG[i] = self.calcGvalue(XYZData[i, :, :])
+
+            self.XYZData = XYZData2
+            self.G = ConfG
+            self.NumAtoms = NumAtoms
+            self.NumAtomsWithPadding = NumAtomsWithPadding
+            self.CheckCentered()
+        else:
+            self.XYZData = XYZData
+            self.G = G
+            self.NumAtoms = NumAtoms
+            self.NumAtomsWithPadding = XYZData.shape[2]
+
+    def __getitem__(self, key):
+        # to keep the dimensions right, we make everything a slice
+        if isinstance(key, int):
+            key = slice(key, key+1)
+        return TheoData(self.XYZData[key], NumAtoms=self.NumAtoms, G=self.G[key])
+
+    def __setitem__(self, key, value):
+        self.XYZData[key] = value.XYZData
+        self.G[key] = value.G
+
+    def CheckCentered(self, Epsilon=1E-5):
+        """Raise an exception if XYZAtomMajor has nonnzero center of mass(CM)."""
+
+        XYZ = self.XYZData.transpose(0, 2, 1)
+        x = np.array([max(abs(XYZ[i].mean(0))) for i in xrange(len(XYZ))]).max()
+        if x > Epsilon:
+            raise Exception("The coordinate data does not appear to have been centered correctly.")
+
+    @staticmethod
+    def centerConformations(XYZList):
+        """Remove the center of mass from conformations.  Inplace to minimize mem. use."""
+
+        for ci in xrange(XYZList.shape[0]):
+            X = XYZList[ci].astype('float64')  # To improve the accuracy of RMSD, it can help to do certain calculations in double precision.
+            X -= X.mean(0)
+            XYZList[ci] = X.astype('float32')
+        return
+
+    @staticmethod
+    def calcGvalue(XYZ):
+        """Calculate the sum of squares of the key matrix G.  A necessary component of Theobold RMSD algorithm."""
+
+        conf=XYZ.astype('float64')  # Doing this operation in double significantly improves numerical precision of RMSD
+        G = 0
+        G += np.dot(conf[:, 0], conf[:, 0])
+        G += np.dot(conf[:, 1], conf[:, 1])
+        G += np.dot(conf[:, 2], conf[:, 2])
+        return G
+
+    def __len__(self):
+        return len(self.XYZData)
+
+
 class LPTraj(dict):
     def __init__(self, S, atomindices=None, permuteindices=None):
         super(LPTraj, self).__init__()
@@ -69,9 +161,9 @@ def __init__(self, S, atomindices=None, permuteindices=None):
         pidx = list(itertools.chain(*permuteindices)) if permuteindices != None else []
 
         if atomindices == None:
-            self.TD = RMSD.TheoData(S.xyz)
+            self.TD = TheoData(S.xyz)
         else:
-            self.TD = RMSD.TheoData(S.xyz[:, np.array(aidx)])
+            self.TD = TheoData(S.xyz[:, np.array(aidx)])
 
     def __getitem__(self, key):
         if isinstance(key, int) or isinstance(key, slice) or isinstance(key, np.ndarray):

diff --git a/Extras/LPRMSD/setup.py b/Extras/LPRMSD/setup.py
@@ -1,14 +1,80 @@
+from __future__ import print_function
 import os, sys
 import numpy
 from glob import glob
+import tempfile
+import shutil
 from setuptools import setup, Extension
+from distutils.ccompiler import new_compiler
+
+def hasfunction(cc, funcname, include=None, extra_postargs=None):
+    # From http://stackoverflow.com/questions/
+    #            7018879/disabling-output-when-compiling-with-distutils
+    tmpdir = tempfile.mkdtemp(prefix='hasfunction-')
+    devnull = oldstderr = None
+    try:
+        try:
+            fname = os.path.join(tmpdir, 'funcname.c')
+            f = open(fname, 'w')
+            if include is not None:
+                f.write('#include %s\n' % include)
+            f.write('int main(void) {\n')
+            f.write('    %s;\n' % funcname)
+            f.write('}\n')
+            f.close()
+            devnull = open(os.devnull, 'w')
+            oldstderr = os.dup(sys.stderr.fileno())
+            os.dup2(devnull.fileno(), sys.stderr.fileno())
+            objects = cc.compile([fname], output_dir=tmpdir,
+                                 extra_postargs=extra_postargs)
+            cc.link_executable(objects, os.path.join(tmpdir, 'a.out'))
+        except Exception as e:
+            return False
+        return True
+    finally:
+        if oldstderr is not None:
+            os.dup2(oldstderr, sys.stderr.fileno())
+        if devnull is not None:
+            devnull.close()
+        shutil.rmtree(tmpdir)
+
+
+def detect_openmp():
+    "Does this compiler support OpenMP parallelization?"
+    compiler = new_compiler()
+    print('Attempting to autodetect OpenMP support...', end=' ')
+    hasopenmp = hasfunction(compiler, 'omp_get_num_threads()')
+    needs_gomp = hasopenmp
+    if not hasopenmp:
+        compiler.add_library('gomp')
+        hasopenmp = hasfunction(compiler, 'omp_get_num_threads()')
+        needs_gomp = hasopenmp
+    print
+    if hasopenmp:
+        print('Compiler supports OpenMP')
+    else:
+        print('Did not detect OpenMP support; parallel RMSD disabled')
+    return hasopenmp, needs_gomp
+
+
+hasopenmp, needs_gomp = detect_openmp()
+
+# If you are 32-bit you should remove the -m64 flag
+compile_args = ["-std=c99", "-O2", "-msse2", "-msse3", 
+                "-Wno-unused", "-m64"]
+
+extra_link_args = ["-lblas", "-lpthread", "-lm"]
+
+if hasopenmp:
+    compile_args.append("-fopenmp")
+
+if needs_gomp:
+    extra_link_args.append("-gomp")
 
 _lprmsd = Extension('_lprmsd',
           sources = glob('src/*.c'),
-          extra_compile_args = ["-std=c99","-O2",
-                                "-msse2","-msse3","-Wno-unused","-fopenmp","-m64"], 
-          # If you are 32-bit you should remove the -m64 flag
-          extra_link_args = ['-lblas', '-lpthread', '-lm', '-lgomp'],
+          extra_compile_args = compile_args,
+          extra_link_args = extra_link_args,
           include_dirs = [numpy.get_include(), os.path.join(numpy.get_include(), 'numpy')])
 
 setup(name='msmbuilder.metrics.lprmsd',

diff --git a/Extras/LPRMSD/src/lprmsd.c b/Extras/LPRMSD/src/lprmsd.c
@@ -25,7 +25,11 @@
 #include <stdio.h>
 #include "theobald_rmsd.h"
 #include "apc.h"
+
+#ifdef _OPENMP
 #include <omp.h>
+#endif
+
 #include <sys/time.h>
 
 #define CHECKARRAYFLOAT(ary,name) if (PyArray_TYPE(ary) != NPY_FLOAT32) { \

diff --git a/MSMBuilder/metrics/__init__.py b/MSMBuilder/metrics/__init__.py
@@ -13,9 +13,9 @@
 from .baseclasses import AbstractDistanceMetric
 from .baseclasses import Vectorized
 from .rmsd import RMSD
+from .lprmsd import LPRMSD
 from .dihedral import Dihedral
 from .contact import BooleanContact, AtomPairs, ContinuousContact
 from .projection import RedDimPNorm
 from .hybrid import Hybrid, HybridPNorm
-from .projection import RedDimPNorm
 from .positions import Positions