Skip to content
This repository has been archived by the owner on Jan 26, 2022. It is now read-only.

[WIP] Fix for LPRMSD after mdtraj merge #427

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
96 changes: 94 additions & 2 deletions Extras/LPRMSD/lprmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import _lprmsd
import mdtraj as md

from collections import namedtuple
import copy
import numpy as np
import itertools
from scipy import optimize
Expand Down Expand Up @@ -60,6 +62,96 @@ def ReadPermFile(fnm):
continue
return (LL, K)


class TheoData(object):
"""Stores temporary data required during Theobald RMSD calculation.

Notes:
Storing temporary data allows us to avoid re-calculating the G-Values
repeatedly. Also avoids re-centering the coordinates."""

Theoslice = namedtuple('TheoSlice', ('xyz', 'G'))

def __init__(self, XYZData, NumAtoms=None, G=None):
"""Create a container for intermediate values during RMSD Calculation.

Notes:
1. We remove center of mass.
2. We pre-calculate matrix magnitudes (ConfG)"""

if NumAtoms is None or G is None:
NumConfs = len(XYZData)
NumAtoms = XYZData.shape[1]

XYZData = copy.copy(XYZData)
self.centerConformations(XYZData)

NumAtomsWithPadding = 4 + NumAtoms - NumAtoms % 4

# Load data and generators into aligned arrays
XYZData2 = np.zeros((NumConfs, 3, NumAtomsWithPadding), dtype=np.float32)
for i in range(NumConfs):
XYZData2[i, 0:3, 0:NumAtoms] = XYZData[i].transpose()

#Precalculate matrix magnitudes
ConfG = np.zeros((NumConfs,),dtype=np.float32)
for i in xrange(NumConfs):
ConfG[i] = self.calcGvalue(XYZData[i, :, :])

self.XYZData = XYZData2
self.G = ConfG
self.NumAtoms = NumAtoms
self.NumAtomsWithPadding = NumAtomsWithPadding
self.CheckCentered()
else:
self.XYZData = XYZData
self.G = G
self.NumAtoms = NumAtoms
self.NumAtomsWithPadding = XYZData.shape[2]

def __getitem__(self, key):
# to keep the dimensions right, we make everything a slice
if isinstance(key, int):
key = slice(key, key+1)
return TheoData(self.XYZData[key], NumAtoms=self.NumAtoms, G=self.G[key])

def __setitem__(self, key, value):
self.XYZData[key] = value.XYZData
self.G[key] = value.G

def CheckCentered(self, Epsilon=1E-5):
"""Raise an exception if XYZAtomMajor has nonnzero center of mass(CM)."""

XYZ = self.XYZData.transpose(0, 2, 1)
x = np.array([max(abs(XYZ[i].mean(0))) for i in xrange(len(XYZ))]).max()
if x > Epsilon:
raise Exception("The coordinate data does not appear to have been centered correctly.")

@staticmethod
def centerConformations(XYZList):
"""Remove the center of mass from conformations. Inplace to minimize mem. use."""

for ci in xrange(XYZList.shape[0]):
X = XYZList[ci].astype('float64') # To improve the accuracy of RMSD, it can help to do certain calculations in double precision.
X -= X.mean(0)
XYZList[ci] = X.astype('float32')
return

@staticmethod
def calcGvalue(XYZ):
"""Calculate the sum of squares of the key matrix G. A necessary component of Theobold RMSD algorithm."""

conf=XYZ.astype('float64') # Doing this operation in double significantly improves numerical precision of RMSD
G = 0
G += np.dot(conf[:, 0], conf[:, 0])
G += np.dot(conf[:, 1], conf[:, 1])
G += np.dot(conf[:, 2], conf[:, 2])
return G

def __len__(self):
return len(self.XYZData)


class LPTraj(dict):
def __init__(self, S, atomindices=None, permuteindices=None):
super(LPTraj, self).__init__()
Expand All @@ -69,9 +161,9 @@ def __init__(self, S, atomindices=None, permuteindices=None):
pidx = list(itertools.chain(*permuteindices)) if permuteindices != None else []

if atomindices == None:
self.TD = RMSD.TheoData(S.xyz)
self.TD = TheoData(S.xyz)
else:
self.TD = RMSD.TheoData(S.xyz[:, np.array(aidx)])
self.TD = TheoData(S.xyz[:, np.array(aidx)])

def __getitem__(self, key):
if isinstance(key, int) or isinstance(key, slice) or isinstance(key, np.ndarray):
Expand Down
74 changes: 70 additions & 4 deletions Extras/LPRMSD/setup.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,80 @@
from __future__ import print_function
import os, sys
import numpy
from glob import glob
import tempfile
import shutil
from setuptools import setup, Extension
from distutils.ccompiler import new_compiler

def hasfunction(cc, funcname, include=None, extra_postargs=None):
# From http://stackoverflow.com/questions/
# 7018879/disabling-output-when-compiling-with-distutils
tmpdir = tempfile.mkdtemp(prefix='hasfunction-')
devnull = oldstderr = None
try:
try:
fname = os.path.join(tmpdir, 'funcname.c')
f = open(fname, 'w')
if include is not None:
f.write('#include %s\n' % include)
f.write('int main(void) {\n')
f.write(' %s;\n' % funcname)
f.write('}\n')
f.close()
devnull = open(os.devnull, 'w')
oldstderr = os.dup(sys.stderr.fileno())
os.dup2(devnull.fileno(), sys.stderr.fileno())
objects = cc.compile([fname], output_dir=tmpdir,
extra_postargs=extra_postargs)
cc.link_executable(objects, os.path.join(tmpdir, 'a.out'))
except Exception as e:
return False
return True
finally:
if oldstderr is not None:
os.dup2(oldstderr, sys.stderr.fileno())
if devnull is not None:
devnull.close()
shutil.rmtree(tmpdir)


def detect_openmp():
"Does this compiler support OpenMP parallelization?"
compiler = new_compiler()
print('Attempting to autodetect OpenMP support...', end=' ')
hasopenmp = hasfunction(compiler, 'omp_get_num_threads()')
needs_gomp = hasopenmp
if not hasopenmp:
compiler.add_library('gomp')
hasopenmp = hasfunction(compiler, 'omp_get_num_threads()')
needs_gomp = hasopenmp
print
if hasopenmp:
print('Compiler supports OpenMP')
else:
print('Did not detect OpenMP support; parallel RMSD disabled')
return hasopenmp, needs_gomp


hasopenmp, needs_gomp = detect_openmp()

# If you are 32-bit you should remove the -m64 flag
compile_args = ["-std=c99", "-O2", "-msse2", "-msse3",
"-Wno-unused", "-m64"]

extra_link_args = ["-lblas", "-lpthread", "-lm"]

if hasopenmp:
compile_args.append("-fopenmp")

if needs_gomp:
extra_link_args.append("-gomp")

_lprmsd = Extension('_lprmsd',
sources = glob('src/*.c'),
extra_compile_args = ["-std=c99","-O2",
"-msse2","-msse3","-Wno-unused","-fopenmp","-m64"],
# If you are 32-bit you should remove the -m64 flag
extra_link_args = ['-lblas', '-lpthread', '-lm', '-lgomp'],
extra_compile_args = compile_args,
extra_link_args = extra_link_args,
include_dirs = [numpy.get_include(), os.path.join(numpy.get_include(), 'numpy')])

setup(name='msmbuilder.metrics.lprmsd',
Expand Down
4 changes: 4 additions & 0 deletions Extras/LPRMSD/src/lprmsd.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
#include <stdio.h>
#include "theobald_rmsd.h"
#include "apc.h"

#ifdef _OPENMP
#include <omp.h>
#endif

#include <sys/time.h>

#define CHECKARRAYFLOAT(ary,name) if (PyArray_TYPE(ary) != NPY_FLOAT32) { \
Expand Down
2 changes: 1 addition & 1 deletion MSMBuilder/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
from .baseclasses import AbstractDistanceMetric
from .baseclasses import Vectorized
from .rmsd import RMSD
from .lprmsd import LPRMSD
from .dihedral import Dihedral
from .contact import BooleanContact, AtomPairs, ContinuousContact
from .projection import RedDimPNorm
from .hybrid import Hybrid, HybridPNorm
from .projection import RedDimPNorm
from .positions import Positions
Loading