Commit 9da36463 authored by peastman's avatar peastman
Browse files

Merge pull request #871 from swails/psfinscode

Improve CHARMM parsing when resnums have inscodes
parents 6f1f89d8 f97f8685
...@@ -37,6 +37,7 @@ from __future__ import division ...@@ -37,6 +37,7 @@ from __future__ import division
from functools import wraps from functools import wraps
from math import pi, cos, sin, sqrt from math import pi, cos, sin, sqrt
import os import os
import re
import simtk.openmm as mm import simtk.openmm as mm
from simtk.openmm.vec3 import Vec3 from simtk.openmm.vec3 import Vec3
import simtk.unit as u import simtk.unit as u
...@@ -99,6 +100,8 @@ class _ZeroDict(dict): ...@@ -99,6 +100,8 @@ class _ZeroDict(dict):
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
_resre = re.compile(r'(\d+)([a-zA-Z]*)')
class CharmmPsfFile(object): class CharmmPsfFile(object):
""" """
A chemical structure instantiated from CHARMM files. A chemical structure instantiated from CHARMM files.
...@@ -192,11 +195,13 @@ class CharmmPsfFile(object): ...@@ -192,11 +195,13 @@ class CharmmPsfFile(object):
atom_list = AtomList() atom_list = AtomList()
for i in xrange(natom): for i in xrange(natom):
words = psfsections['NATOM'][1][i].split() words = psfsections['NATOM'][1][i].split()
atid = int(words[0])
if atid != i + 1:
raise CharmmPSFError('Nonsequential atoms detected!')
system = words[1] system = words[1]
resid = conv(words[2], int, 'residue number') rematch = _resre.match(words[2])
if not rematch:
raise RuntimeError('Could not parse residue number %s' %
words[2])
resid, inscode = rematch.groups()
resid = int(resid)
resname = words[3] resname = words[3]
name = words[4] name = words[4]
attype = words[5] attype = words[5]
...@@ -209,7 +214,7 @@ class CharmmPsfFile(object): ...@@ -209,7 +214,7 @@ class CharmmPsfFile(object):
mass = conv(words[7], float, 'atomic mass') mass = conv(words[7], float, 'atomic mass')
props = words[8:] props = words[8:]
atom = residue_list.add_atom(system, resid, resname, name, atom = residue_list.add_atom(system, resid, resname, name,
attype, charge, mass, props) attype, charge, mass, inscode, props)
atom_list.append(atom) atom_list.append(atom)
atom_list.assign_indexes() atom_list.assign_indexes()
atom_list.changed = False atom_list.changed = False
...@@ -443,175 +448,6 @@ class CharmmPsfFile(object): ...@@ -443,175 +448,6 @@ class CharmmPsfFile(object):
line = psf.readline().strip() line = psf.readline().strip()
return title, pointers, data return title, pointers, data
def writePsf(self, dest, vmd=False):
"""
Writes a PSF file from the stored molecule
Parameters:
- dest (str or file-like) : The place to write the output PSF file.
If it has a "write" attribute, it will be used to print the
PSF file. Otherwise, it will be treated like a string and a
file will be opened, printed, then closed
- vmd (bool) : If True, it will write out a PSF in the format that
VMD prints it in (i.e., no NUMLP/NUMLPH or MOLNT sections)
Example:
>>> cs = CharmmPsfFile('testfiles/test.psf')
>>> cs.writePsf('testfiles/test2.psf')
"""
# See if this is an extended format
ext = 'EXT' in self.flags
own_handle = False
# Index the atoms and residues
self.residue_list.assign_indexes()
self.atom_list.assign_indexes()
if not hasattr(dest, 'write'):
own_handle = True
dest = open(dest, 'w')
# Assign the formats we need to write with
if ext:
atmfmt1 = ('%10d %-8s %-8i %-8s %-8s %4d %10.6f %13.4f' + 11*' ')
atmfmt2 = ('%10d %-8s %-8i %-8s %-8s %-4s %10.6f %13.4f' + 11*' ')
intfmt = '%10d' # For pointers
else:
atmfmt1 = ('%8d %-4s %-4i %-4s %-4s %4d %10.6f %13.4f' + 11*' ')
atmfmt2 = ('%8d %-4s %-4i %-4s %-4s %-4s %10.6f %13.4f' + 11*' ')
intfmt = '%8d' # For pointers
# Now print the header then the title
dest.write('PSF ' + ' '.join(self.flags) + '\n')
dest.write('\n')
dest.write(intfmt % len(self.title) + ' !NTITLE\n')
dest.write('\n'.join(self.title) + '\n\n')
# Now time for the atoms
dest.write(intfmt % len(self.atom_list) + ' !NATOM\n')
# atmfmt1 is for CHARMM format (i.e., atom types are integers)
# atmfmt is for XPLOR format (i.e., atom types are strings)
for i, atom in enumerate(self.atom_list):
if isinstance(atom.attype, str):
fmt = atmfmt2
else:
fmt = atmfmt1
atmstr = fmt % (i+1, atom.system, atom.residue.resnum,
atom.residue.resname, atom.name, atom.attype,
atom.charge, atom.mass)
dest.write(atmstr + ' '.join(atom.props) + '\n')
dest.write('\n')
# Bonds
dest.write(intfmt % len(self.bond_list) + ' !NBOND: bonds\n')
for i, bond in enumerate(self.bond_list):
dest.write((intfmt*2) % (bond.atom1.idx+1, bond.atom2.idx+1))
if i % 4 == 3: # Write 4 bonds per line
dest.write('\n')
# See if we need to terminate
if len(self.bond_list) % 4 != 0 or len(self.bond_list) == 0:
dest.write('\n')
dest.write('\n')
# Angles
dest.write(intfmt % len(self.angle_list) + ' !NTHETA: angles\n')
for i, angle in enumerate(self.angle_list):
dest.write((intfmt*3) % (angle.atom1.idx+1, angle.atom2.idx+1,
angle.atom3.idx+1)
)
if i % 3 == 2: # Write 3 angles per line
dest.write('\n')
# See if we need to terminate
if len(self.angle_list) % 3 != 0 or len(self.angle_list) == 0:
dest.write('\n')
dest.write('\n')
# Dihedrals
dest.write(intfmt % len(self.dihedral_list) + ' !NPHI: dihedrals\n')
for i, dih in enumerate(self.dihedral_list):
dest.write((intfmt*4) % (dih.atom1.idx+1, dih.atom2.idx+1,
dih.atom3.idx+1, dih.atom4.idx+1)
)
if i % 2 == 1: # Write 2 dihedrals per line
dest.write('\n')
# See if we need to terminate
if len(self.dihedral_list) % 2 != 0 or len(self.dihedral_list) == 0:
dest.write('\n')
dest.write('\n')
# Impropers
dest.write(intfmt % len(self.improper_list) + ' !NIMPHI: impropers\n')
for i, imp in enumerate(self.improper_list):
dest.write((intfmt*4) % (imp.atom1.idx+1, imp.atom2.idx+1,
imp.atom3.idx+1, imp.atom4.idx+1)
)
if i % 2 == 1: # Write 2 dihedrals per line
dest.write('\n')
# See if we need to terminate
if len(self.improper_list) % 2 != 0 or len(self.improper_list) == 0:
dest.write('\n')
dest.write('\n')
# Donor section
dest.write(intfmt % len(self.donor_list) + ' !NDON: donors\n')
for i, don in enumerate(self.donor_list):
dest.write((intfmt*2) % (don.atom1.idx+1, don.atom2.idx+1))
if i % 4 == 3: # 4 donors per line
dest.write('\n')
if len(self.donor_list) % 4 != 0 or len(self.donor_list) == 0:
dest.write('\n')
dest.write('\n')
# Acceptor section
dest.write(intfmt % len(self.acceptor_list) + ' !NACC: acceptors\n')
for i, acc in enumerate(self.acceptor_list):
dest.write((intfmt*2) % (acc.atom1.idx+1, acc.atom2.idx+1))
if i % 4 == 3: # 4 donors per line
dest.write('\n')
if len(self.acceptor_list) % 4 != 0 or len(self.acceptor_list) == 0:
dest.write('\n')
dest.write('\n')
# NNB section ??
dest.write(intfmt % 0 + ' !NNB\n\n')
for i in range(len(self.atom_list)):
dest.write(intfmt % 0)
if i % 8 == 7: # Write 8 0's per line
dest.write('\n')
if len(self.atom_list) % 8 != 0: dest.write('\n')
dest.write('\n')
# Group section
dest.write((intfmt*2) % (len(self.group_list), self.group_list.nst2))
dest.write(' !NGRP NST2\n')
for i, gp in enumerate(self.group_list):
dest.write((intfmt*3) % (gp.bs, gp.type, gp.move))
if i % 3 == 2: dest.write('\n')
if len(self.group_list) % 3 != 0 or len(self.group_list) == 0:
dest.write('\n')
dest.write('\n')
# The next two sections are never found in VMD prmtops...
if not vmd:
# Molecule section; first set molecularity
set_molecules(self.atom_list)
mollist = [a.marked for a in self.atom_list]
dest.write(intfmt % max(mollist) + ' !MOLNT\n')
for i, atom in enumerate(self.atom_list):
dest.write(intfmt % atom.marked)
if i % 8 == 7: dest.write('\n')
if len(self.atom_list) % 8 != 0: dest.write('\n')
dest.write('\n')
# NUMLP/NUMLPH section
dest.write((intfmt*2) % (0, 0) + ' !NUMLP NUMLPH\n')
dest.write('\n')
# CMAP section
dest.write(intfmt % len(self.cmap_list) + ' !NCRTERM: cross-terms\n')
for i, cmap in enumerate(self.cmap_list):
dest.write((intfmt*4) % (cmap.atom1.idx+1, cmap.atom2.idx+1,
cmap.atom3.idx+1, cmap.atom4.idx+1)
)
if cmap.consecutive:
dest.write((intfmt*4) % (cmap.atom2.idx+1, cmap.atom3.idx+1,
cmap.atom4.idx+1, cmap.atom5.idx+1)
)
else:
dest.write((intfmt*4) % (cmap.atom5.idx+1, cmap.atom6.idx+1,
cmap.atom7.idx+1, cmap.atom8.idx+1)
)
dest.write('\n')
# Done!
# If we opened our own handle, close it
if own_handle:
dest.close()
def loadParameters(self, parmset): def loadParameters(self, parmset):
""" """
Loads parameters from a parameter set that was loaded via CHARMM RTF, Loads parameters from a parameter set that was loaded via CHARMM RTF,
...@@ -776,13 +612,13 @@ class CharmmPsfFile(object): ...@@ -776,13 +612,13 @@ class CharmmPsfFile(object):
last_residue = None last_residue = None
# Add each chain (separate 'system's) and residue # Add each chain (separate 'system's) and residue
for atom in self.atom_list: for atom in self.atom_list:
resid = '%d%s' % (atom.residue.idx, atom.residue.inscode)
if atom.system != last_chain: if atom.system != last_chain:
chain = topology.addChain(atom.system) chain = topology.addChain(atom.system)
last_chain = atom.system last_chain = atom.system
if atom.residue.idx != last_residue: if resid != last_residue:
last_residue = atom.residue.idx last_residue = resid
residue = topology.addResidue(atom.residue.resname, chain, residue = topology.addResidue(atom.residue.resname, chain, resid)
str(atom.residue.idx))
if atom.type is not None: if atom.type is not None:
# This is the most reliable way of determining the element # This is the most reliable way of determining the element
atomic_num = atom.type.atomic_number atomic_num = atom.type.atomic_number
......
...@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. ...@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
__author__ = "Christopher M. Bruns" __author__ = "Christopher M. Bruns"
__version__ = "1.0" __version__ = "1.0"
from collections import OrderedDict
from simtk.unit import daltons, is_quantity from simtk.unit import daltons, is_quantity
import copy_reg import copy_reg
...@@ -47,6 +47,7 @@ class Element(object): ...@@ -47,6 +47,7 @@ class Element(object):
_elements_by_symbol = {} _elements_by_symbol = {}
_elements_by_atomic_number = {} _elements_by_atomic_number = {}
_elements_by_mass = None
def __init__(self, number, name, symbol, mass): def __init__(self, number, name, symbol, mass):
"""Create a new element """Create a new element
...@@ -67,8 +68,11 @@ class Element(object): ...@@ -67,8 +68,11 @@ class Element(object):
self._mass = mass self._mass = mass
# Index this element in a global table # Index this element in a global table
s = symbol.strip().upper() s = symbol.strip().upper()
## If we add a new element, we need to re-hash elements by mass
Element._elements_by_mass = None
assert s not in Element._elements_by_symbol if s in Element._elements_by_symbol:
raise ValueError('Duplicate element symbol %s' % s)
Element._elements_by_symbol[s] = self Element._elements_by_symbol[s] = self
if number in Element._elements_by_atomic_number: if number in Element._elements_by_atomic_number:
other_element = Element._elements_by_atomic_number[number] other_element = Element._elements_by_atomic_number[number]
...@@ -96,20 +100,45 @@ class Element(object): ...@@ -96,20 +100,45 @@ class Element(object):
""" """
Get the element whose mass is CLOSEST to the requested mass. This method Get the element whose mass is CLOSEST to the requested mass. This method
should not be used for repartitioned masses should not be used for repartitioned masses
Parameters
----------
mass : float or Quantity
Mass of the atom to find the element for. Units assumed to be
daltons if not specified
Returns
-------
element : Element
The element whose atomic mass is closest to the input mass
""" """
# Assume masses are in daltons if they are not units # Assume masses are in daltons if they are not units
if not is_quantity(mass): if is_quantity(mass):
mass = mass * daltons mass = mass.value_in_unit(daltons)
if mass < 0:
raise ValueError('Invalid Higgs field')
# If this is our first time calling getByMass (or we added an element
# since the last call), re-generate the ordered by-mass dict cache
if Element._elements_by_mass is None:
Element._elements_by_mass = OrderedDict()
for elem in sorted(Element._elements_by_symbol.values(),
key=lambda x: x.mass):
Element._elements_by_mass[elem.mass] = elem
diff = mass diff = mass
best_guess = None best_guess = None
for key in Element._elements_by_atomic_number: for elemmass, element in Element._elements_by_mass.iteritems():
element = Element._elements_by_atomic_number[key] massdiff = abs(elemmass._value - mass)
massdiff = abs(element.mass - mass)
if massdiff < diff: if massdiff < diff:
best_guess = element best_guess = element
diff = massdiff diff = massdiff
if elemmass._value > mass:
# Elements are only getting heavier, so bail out early
return best_guess
# This really should only happen if we wanted ununoctium or something
# bigger... won't really happen but still make sure we return an Element
return best_guess return best_guess
@property @property
...@@ -145,6 +174,9 @@ def _pickle_element(element): ...@@ -145,6 +174,9 @@ def _pickle_element(element):
copy_reg.pickle(Element, _pickle_element) copy_reg.pickle(Element, _pickle_element)
# NOTE: getElementByMass assumes all masses are Quantity instances with unit
# "daltons". All elements need to obey this assumption, or that method will
# fail. No checking is done in getElementByMass for performance reasons
hydrogen = Element( 1, "hydrogen", "H", 1.007947*daltons) hydrogen = Element( 1, "hydrogen", "H", 1.007947*daltons)
deuterium = Element( 1, "deuterium", "D", 2.01355321270*daltons) deuterium = Element( 1, "deuterium", "D", 2.01355321270*daltons)
helium = Element( 2, "helium", "He", 4.003*daltons) helium = Element( 2, "helium", "He", 4.003*daltons)
......
...@@ -375,12 +375,13 @@ class AtomList(TrackedList): ...@@ -375,12 +375,13 @@ class AtomList(TrackedList):
class Residue(object): class Residue(object):
""" Residue class """ """ Residue class """
def __init__(self, resname, idx): def __init__(self, resname, idx, inscode=''):
self.resname = resname self.resname = resname
self.idx = idx self.idx = idx
self.atoms = [] self.atoms = []
self.resnum = None # Numbered based on SYSTEM name self.resnum = None # Numbered based on SYSTEM name
self.system = None self.system = None
self.inscode = inscode
def add_atom(self, atom): def add_atom(self, atom):
if self.system is None: if self.system is None:
...@@ -416,7 +417,7 @@ class Residue(object): ...@@ -416,7 +417,7 @@ class Residue(object):
return self.resname == thing.resname and self.idx == thing.idx return self.resname == thing.resname and self.idx == thing.idx
if isinstance(thing, tuple) or isinstance(thing, list): if isinstance(thing, tuple) or isinstance(thing, list):
# Must be resnum, resname # Must be resnum, resname
return thing == (self.resname, self.idx) return thing == (self.resname, self.idx, self.inscode)
return False # No other type can be equal. return False # No other type can be equal.
def __ne__(self, thing): def __ne__(self, thing):
...@@ -447,7 +448,7 @@ class ResidueList(list): ...@@ -447,7 +448,7 @@ class ResidueList(list):
resnum += 1 resnum += 1
def add_atom(self, system, resnum, resname, name, def add_atom(self, system, resnum, resname, name,
attype, charge, mass, props=None): attype, charge, mass, inscode, props=None):
""" """
Adds an atom to the list of residues. If the residue is not the same as Adds an atom to the list of residues. If the residue is not the same as
the last residue that was created, a new Residue is created and added the last residue that was created, a new Residue is created and added
...@@ -461,25 +462,22 @@ class ResidueList(list): ...@@ -461,25 +462,22 @@ class ResidueList(list):
- attype (int or str) : Type of the atom - attype (int or str) : Type of the atom
- charge (float) : Partial atomic charge of the atom - charge (float) : Partial atomic charge of the atom
- mass (float) : Mass (amu) of the atom - mass (float) : Mass (amu) of the atom
- inscode (str) : Insertion code, if it is specified
Returns: Returns:
The Atom instance created and added to the list of residues The Atom instance created and added to the list of residues
""" """
if self._last_residue is None: lr = self._last_residue
res = self._last_residue = Residue(resname, resnum) if lr is None:
res = self._last_residue = Residue(resname, resnum, inscode)
list.append(self, res) list.append(self, res)
elif (self._last_residue != (resname, resnum) or elif (lr.resname != resname or lr.idx != resname or
system != self._last_residue.system): lr.inscode != inscode or system != lr.system):
if (self._last_residue.idx == resnum and res = self._last_residue = Residue(resname, resnum, inscode)
self._last_residue.system == system): res.system = system
lresname = self._last_residue.resname
warnings.warn('Residue %d split into separate residues %s '
'and %s' % (resnum, lresname, resname),
SplitResidueWarning)
res = self._last_residue = Residue(resname, resnum)
list.append(self, res) list.append(self, res)
else: else:
res = self._last_residue res = lr
atom = Atom(system, name, attype, float(charge), float(mass), props) atom = Atom(system, name, attype, float(charge), float(mass), props)
res.add_atom(atom) res.add_atom(atom)
return atom return atom
......
...@@ -115,6 +115,13 @@ class TestCharmmFiles(unittest.TestCase): ...@@ -115,6 +115,13 @@ class TestCharmmFiles(unittest.TestCase):
ene = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole) ene = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
self.assertAlmostEqual(ene, 15490.0033559, delta=0.05) self.assertAlmostEqual(ene, 15490.0033559, delta=0.05)
def test_InsCode(self):
""" Test the parsing of PSF files that contain insertion codes in their residue numbers """
psf = CharmmPsfFile('systems/4TVP-dmj_wat-ion.psf')
self.assertEqual(len(list(psf.topology.atoms())), 66264)
self.assertEqual(len(list(psf.topology.residues())), 20169)
self.assertEqual(len(list(psf.topology.bonds())), 46634)
def test_ImplicitSolventForces(self): def test_ImplicitSolventForces(self):
"""Compute forces for different implicit solvent types, and compare them to ones generated with a previous version of OpenMM to ensure they haven't changed.""" """Compute forces for different implicit solvent types, and compare them to ones generated with a previous version of OpenMM to ensure they haven't changed."""
solventType = [HCT, OBC1, OBC2, GBn, GBn2] solventType = [HCT, OBC1, OBC2, GBn, GBn2]
......
import pickle import pickle
import unittest import random
from simtk.unit import dalton from simtk.unit import dalton, is_quantity
from simtk.openmm.app import element from simtk.openmm.app import element
import unittest
class TestElement(unittest.TestCase): class TestElement(unittest.TestCase):
def test_immutable(self): def test_immutable(self):
...@@ -14,18 +15,37 @@ class TestElement(unittest.TestCase): ...@@ -14,18 +15,37 @@ class TestElement(unittest.TestCase):
newsulfur = pickle.loads(pickle.dumps(element.sulfur)) newsulfur = pickle.loads(pickle.dumps(element.sulfur))
# make sure that a new object is not created during the pickle/unpickle # make sure that a new object is not created during the pickle/unpickle
# cycle # cycle
assert element.sulfur == newsulfur self.assertEqual(element.sulfur, newsulfur)
assert element.sulfur is newsulfur self.assertTrue(element.sulfur is newsulfur)
assert id(element.sulfur) == id(newsulfur)
def test_attributes(self): def test_attributes(self):
assert element.hydrogen.atomic_number == 1 self.assertEqual(element.hydrogen.atomic_number, 1)
assert element.hydrogen.symbol == 'H' self.assertEqual(element.hydrogen.symbol, 'H')
assert element.hydrogen.name == 'hydrogen' self.assertEqual(element.hydrogen.name, 'hydrogen')
assert element.hydrogen.mass == 1.007947 * dalton self.assertEqual(element.hydrogen.mass, 1.007947 * dalton)
def test_getByMass(self):
""" Tests the getByMass method """
def exhaustive_search(mass):
"""
Searches through all element symbols and finds the one with the
smallest mass difference
"""
min_diff = mass
closest_element = None
for symbol, elem in element.Element._elements_by_symbol.items():
diff = abs(elem.mass._value - mass)
if diff < min_diff:
min_diff = diff
closest_element = elem
return closest_element
if __name__ == '__main__': # Check 5000 random numbers between 0 and 300
unittest.main() for i in range(5000):
mass = random.random() * 300
elem = element.Element.getByMass(mass)
self.assertTrue(elem is exhaustive_search(mass))
if __name__ == '__main__':
unittest.main()
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment