Commit 9da36463 authored by peastman's avatar peastman
Browse files

Merge pull request #871 from swails/psfinscode

Improve CHARMM parsing when resnums have inscodes
parents 6f1f89d8 f97f8685
......@@ -37,6 +37,7 @@ from __future__ import division
from functools import wraps
from math import pi, cos, sin, sqrt
import os
import re
import simtk.openmm as mm
from simtk.openmm.vec3 import Vec3
import simtk.unit as u
......@@ -99,6 +100,8 @@ class _ZeroDict(dict):
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
_resre = re.compile(r'(\d+)([a-zA-Z]*)')
class CharmmPsfFile(object):
"""
A chemical structure instantiated from CHARMM files.
......@@ -192,11 +195,13 @@ class CharmmPsfFile(object):
atom_list = AtomList()
for i in xrange(natom):
words = psfsections['NATOM'][1][i].split()
atid = int(words[0])
if atid != i + 1:
raise CharmmPSFError('Nonsequential atoms detected!')
system = words[1]
resid = conv(words[2], int, 'residue number')
rematch = _resre.match(words[2])
if not rematch:
raise RuntimeError('Could not parse residue number %s' %
words[2])
resid, inscode = rematch.groups()
resid = int(resid)
resname = words[3]
name = words[4]
attype = words[5]
......@@ -209,7 +214,7 @@ class CharmmPsfFile(object):
mass = conv(words[7], float, 'atomic mass')
props = words[8:]
atom = residue_list.add_atom(system, resid, resname, name,
attype, charge, mass, props)
attype, charge, mass, inscode, props)
atom_list.append(atom)
atom_list.assign_indexes()
atom_list.changed = False
......@@ -443,175 +448,6 @@ class CharmmPsfFile(object):
line = psf.readline().strip()
return title, pointers, data
def writePsf(self, dest, vmd=False):
"""
Writes a PSF file from the stored molecule
Parameters:
- dest (str or file-like) : The place to write the output PSF file.
If it has a "write" attribute, it will be used to print the
PSF file. Otherwise, it will be treated like a string and a
file will be opened, printed, then closed
- vmd (bool) : If True, it will write out a PSF in the format that
VMD prints it in (i.e., no NUMLP/NUMLPH or MOLNT sections)
Example:
>>> cs = CharmmPsfFile('testfiles/test.psf')
>>> cs.writePsf('testfiles/test2.psf')
"""
# See if this is an extended format
ext = 'EXT' in self.flags
own_handle = False
# Index the atoms and residues
self.residue_list.assign_indexes()
self.atom_list.assign_indexes()
if not hasattr(dest, 'write'):
own_handle = True
dest = open(dest, 'w')
# Assign the formats we need to write with
if ext:
atmfmt1 = ('%10d %-8s %-8i %-8s %-8s %4d %10.6f %13.4f' + 11*' ')
atmfmt2 = ('%10d %-8s %-8i %-8s %-8s %-4s %10.6f %13.4f' + 11*' ')
intfmt = '%10d' # For pointers
else:
atmfmt1 = ('%8d %-4s %-4i %-4s %-4s %4d %10.6f %13.4f' + 11*' ')
atmfmt2 = ('%8d %-4s %-4i %-4s %-4s %-4s %10.6f %13.4f' + 11*' ')
intfmt = '%8d' # For pointers
# Now print the header then the title
dest.write('PSF ' + ' '.join(self.flags) + '\n')
dest.write('\n')
dest.write(intfmt % len(self.title) + ' !NTITLE\n')
dest.write('\n'.join(self.title) + '\n\n')
# Now time for the atoms
dest.write(intfmt % len(self.atom_list) + ' !NATOM\n')
# atmfmt1 is for CHARMM format (i.e., atom types are integers)
# atmfmt is for XPLOR format (i.e., atom types are strings)
for i, atom in enumerate(self.atom_list):
if isinstance(atom.attype, str):
fmt = atmfmt2
else:
fmt = atmfmt1
atmstr = fmt % (i+1, atom.system, atom.residue.resnum,
atom.residue.resname, atom.name, atom.attype,
atom.charge, atom.mass)
dest.write(atmstr + ' '.join(atom.props) + '\n')
dest.write('\n')
# Bonds
dest.write(intfmt % len(self.bond_list) + ' !NBOND: bonds\n')
for i, bond in enumerate(self.bond_list):
dest.write((intfmt*2) % (bond.atom1.idx+1, bond.atom2.idx+1))
if i % 4 == 3: # Write 4 bonds per line
dest.write('\n')
# See if we need to terminate
if len(self.bond_list) % 4 != 0 or len(self.bond_list) == 0:
dest.write('\n')
dest.write('\n')
# Angles
dest.write(intfmt % len(self.angle_list) + ' !NTHETA: angles\n')
for i, angle in enumerate(self.angle_list):
dest.write((intfmt*3) % (angle.atom1.idx+1, angle.atom2.idx+1,
angle.atom3.idx+1)
)
if i % 3 == 2: # Write 3 angles per line
dest.write('\n')
# See if we need to terminate
if len(self.angle_list) % 3 != 0 or len(self.angle_list) == 0:
dest.write('\n')
dest.write('\n')
# Dihedrals
dest.write(intfmt % len(self.dihedral_list) + ' !NPHI: dihedrals\n')
for i, dih in enumerate(self.dihedral_list):
dest.write((intfmt*4) % (dih.atom1.idx+1, dih.atom2.idx+1,
dih.atom3.idx+1, dih.atom4.idx+1)
)
if i % 2 == 1: # Write 2 dihedrals per line
dest.write('\n')
# See if we need to terminate
if len(self.dihedral_list) % 2 != 0 or len(self.dihedral_list) == 0:
dest.write('\n')
dest.write('\n')
# Impropers
dest.write(intfmt % len(self.improper_list) + ' !NIMPHI: impropers\n')
for i, imp in enumerate(self.improper_list):
dest.write((intfmt*4) % (imp.atom1.idx+1, imp.atom2.idx+1,
imp.atom3.idx+1, imp.atom4.idx+1)
)
if i % 2 == 1: # Write 2 dihedrals per line
dest.write('\n')
# See if we need to terminate
if len(self.improper_list) % 2 != 0 or len(self.improper_list) == 0:
dest.write('\n')
dest.write('\n')
# Donor section
dest.write(intfmt % len(self.donor_list) + ' !NDON: donors\n')
for i, don in enumerate(self.donor_list):
dest.write((intfmt*2) % (don.atom1.idx+1, don.atom2.idx+1))
if i % 4 == 3: # 4 donors per line
dest.write('\n')
if len(self.donor_list) % 4 != 0 or len(self.donor_list) == 0:
dest.write('\n')
dest.write('\n')
# Acceptor section
dest.write(intfmt % len(self.acceptor_list) + ' !NACC: acceptors\n')
for i, acc in enumerate(self.acceptor_list):
dest.write((intfmt*2) % (acc.atom1.idx+1, acc.atom2.idx+1))
if i % 4 == 3: # 4 donors per line
dest.write('\n')
if len(self.acceptor_list) % 4 != 0 or len(self.acceptor_list) == 0:
dest.write('\n')
dest.write('\n')
# NNB section ??
dest.write(intfmt % 0 + ' !NNB\n\n')
for i in range(len(self.atom_list)):
dest.write(intfmt % 0)
if i % 8 == 7: # Write 8 0's per line
dest.write('\n')
if len(self.atom_list) % 8 != 0: dest.write('\n')
dest.write('\n')
# Group section
dest.write((intfmt*2) % (len(self.group_list), self.group_list.nst2))
dest.write(' !NGRP NST2\n')
for i, gp in enumerate(self.group_list):
dest.write((intfmt*3) % (gp.bs, gp.type, gp.move))
if i % 3 == 2: dest.write('\n')
if len(self.group_list) % 3 != 0 or len(self.group_list) == 0:
dest.write('\n')
dest.write('\n')
# The next two sections are never found in VMD prmtops...
if not vmd:
# Molecule section; first set molecularity
set_molecules(self.atom_list)
mollist = [a.marked for a in self.atom_list]
dest.write(intfmt % max(mollist) + ' !MOLNT\n')
for i, atom in enumerate(self.atom_list):
dest.write(intfmt % atom.marked)
if i % 8 == 7: dest.write('\n')
if len(self.atom_list) % 8 != 0: dest.write('\n')
dest.write('\n')
# NUMLP/NUMLPH section
dest.write((intfmt*2) % (0, 0) + ' !NUMLP NUMLPH\n')
dest.write('\n')
# CMAP section
dest.write(intfmt % len(self.cmap_list) + ' !NCRTERM: cross-terms\n')
for i, cmap in enumerate(self.cmap_list):
dest.write((intfmt*4) % (cmap.atom1.idx+1, cmap.atom2.idx+1,
cmap.atom3.idx+1, cmap.atom4.idx+1)
)
if cmap.consecutive:
dest.write((intfmt*4) % (cmap.atom2.idx+1, cmap.atom3.idx+1,
cmap.atom4.idx+1, cmap.atom5.idx+1)
)
else:
dest.write((intfmt*4) % (cmap.atom5.idx+1, cmap.atom6.idx+1,
cmap.atom7.idx+1, cmap.atom8.idx+1)
)
dest.write('\n')
# Done!
# If we opened our own handle, close it
if own_handle:
dest.close()
def loadParameters(self, parmset):
"""
Loads parameters from a parameter set that was loaded via CHARMM RTF,
......@@ -776,13 +612,13 @@ class CharmmPsfFile(object):
last_residue = None
# Add each chain (separate 'system's) and residue
for atom in self.atom_list:
resid = '%d%s' % (atom.residue.idx, atom.residue.inscode)
if atom.system != last_chain:
chain = topology.addChain(atom.system)
last_chain = atom.system
if atom.residue.idx != last_residue:
last_residue = atom.residue.idx
residue = topology.addResidue(atom.residue.resname, chain,
str(atom.residue.idx))
if resid != last_residue:
last_residue = resid
residue = topology.addResidue(atom.residue.resname, chain, resid)
if atom.type is not None:
# This is the most reliable way of determining the element
atomic_num = atom.type.atomic_number
......
......@@ -31,7 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
__author__ = "Christopher M. Bruns"
__version__ = "1.0"
from collections import OrderedDict
from simtk.unit import daltons, is_quantity
import copy_reg
......@@ -47,6 +47,7 @@ class Element(object):
_elements_by_symbol = {}
_elements_by_atomic_number = {}
_elements_by_mass = None
def __init__(self, number, name, symbol, mass):
"""Create a new element
......@@ -67,8 +68,11 @@ class Element(object):
self._mass = mass
# Index this element in a global table
s = symbol.strip().upper()
## If we add a new element, we need to re-hash elements by mass
Element._elements_by_mass = None
assert s not in Element._elements_by_symbol
if s in Element._elements_by_symbol:
raise ValueError('Duplicate element symbol %s' % s)
Element._elements_by_symbol[s] = self
if number in Element._elements_by_atomic_number:
other_element = Element._elements_by_atomic_number[number]
......@@ -96,20 +100,45 @@ class Element(object):
"""
Get the element whose mass is CLOSEST to the requested mass. This method
should not be used for repartitioned masses
Parameters
----------
mass : float or Quantity
Mass of the atom to find the element for. Units assumed to be
daltons if not specified
Returns
-------
element : Element
The element whose atomic mass is closest to the input mass
"""
# Assume masses are in daltons if they are not units
if not is_quantity(mass):
mass = mass * daltons
if is_quantity(mass):
mass = mass.value_in_unit(daltons)
if mass < 0:
raise ValueError('Invalid Higgs field')
# If this is our first time calling getByMass (or we added an element
# since the last call), re-generate the ordered by-mass dict cache
if Element._elements_by_mass is None:
Element._elements_by_mass = OrderedDict()
for elem in sorted(Element._elements_by_symbol.values(),
key=lambda x: x.mass):
Element._elements_by_mass[elem.mass] = elem
diff = mass
best_guess = None
for key in Element._elements_by_atomic_number:
element = Element._elements_by_atomic_number[key]
massdiff = abs(element.mass - mass)
for elemmass, element in Element._elements_by_mass.iteritems():
massdiff = abs(elemmass._value - mass)
if massdiff < diff:
best_guess = element
diff = massdiff
if elemmass._value > mass:
# Elements are only getting heavier, so bail out early
return best_guess
# This really should only happen if we wanted ununoctium or something
# bigger... won't really happen but still make sure we return an Element
return best_guess
@property
......@@ -145,6 +174,9 @@ def _pickle_element(element):
copy_reg.pickle(Element, _pickle_element)
# NOTE: getElementByMass assumes all masses are Quantity instances with unit
# "daltons". All elements need to obey this assumption, or that method will
# fail. No checking is done in getElementByMass for performance reasons
hydrogen = Element( 1, "hydrogen", "H", 1.007947*daltons)
deuterium = Element( 1, "deuterium", "D", 2.01355321270*daltons)
helium = Element( 2, "helium", "He", 4.003*daltons)
......
......@@ -375,12 +375,13 @@ class AtomList(TrackedList):
class Residue(object):
""" Residue class """
def __init__(self, resname, idx):
def __init__(self, resname, idx, inscode=''):
self.resname = resname
self.idx = idx
self.atoms = []
self.resnum = None # Numbered based on SYSTEM name
self.system = None
self.inscode = inscode
def add_atom(self, atom):
if self.system is None:
......@@ -416,7 +417,7 @@ class Residue(object):
return self.resname == thing.resname and self.idx == thing.idx
if isinstance(thing, tuple) or isinstance(thing, list):
# Must be resnum, resname
return thing == (self.resname, self.idx)
return thing == (self.resname, self.idx, self.inscode)
return False # No other type can be equal.
def __ne__(self, thing):
......@@ -447,7 +448,7 @@ class ResidueList(list):
resnum += 1
def add_atom(self, system, resnum, resname, name,
attype, charge, mass, props=None):
attype, charge, mass, inscode, props=None):
"""
Adds an atom to the list of residues. If the residue is not the same as
the last residue that was created, a new Residue is created and added
......@@ -461,25 +462,22 @@ class ResidueList(list):
- attype (int or str) : Type of the atom
- charge (float) : Partial atomic charge of the atom
- mass (float) : Mass (amu) of the atom
- inscode (str) : Insertion code, if it is specified
Returns:
The Atom instance created and added to the list of residues
"""
if self._last_residue is None:
res = self._last_residue = Residue(resname, resnum)
lr = self._last_residue
if lr is None:
res = self._last_residue = Residue(resname, resnum, inscode)
list.append(self, res)
elif (self._last_residue != (resname, resnum) or
system != self._last_residue.system):
if (self._last_residue.idx == resnum and
self._last_residue.system == system):
lresname = self._last_residue.resname
warnings.warn('Residue %d split into separate residues %s '
'and %s' % (resnum, lresname, resname),
SplitResidueWarning)
res = self._last_residue = Residue(resname, resnum)
elif (lr.resname != resname or lr.idx != resname or
lr.inscode != inscode or system != lr.system):
res = self._last_residue = Residue(resname, resnum, inscode)
res.system = system
list.append(self, res)
else:
res = self._last_residue
res = lr
atom = Atom(system, name, attype, float(charge), float(mass), props)
res.add_atom(atom)
return atom
......
......@@ -115,6 +115,13 @@ class TestCharmmFiles(unittest.TestCase):
ene = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
self.assertAlmostEqual(ene, 15490.0033559, delta=0.05)
def test_InsCode(self):
""" Test the parsing of PSF files that contain insertion codes in their residue numbers """
psf = CharmmPsfFile('systems/4TVP-dmj_wat-ion.psf')
self.assertEqual(len(list(psf.topology.atoms())), 66264)
self.assertEqual(len(list(psf.topology.residues())), 20169)
self.assertEqual(len(list(psf.topology.bonds())), 46634)
def test_ImplicitSolventForces(self):
"""Compute forces for different implicit solvent types, and compare them to ones generated with a previous version of OpenMM to ensure they haven't changed."""
solventType = [HCT, OBC1, OBC2, GBn, GBn2]
......
import pickle
import unittest
from simtk.unit import dalton
import random
from simtk.unit import dalton, is_quantity
from simtk.openmm.app import element
import unittest
class TestElement(unittest.TestCase):
def test_immutable(self):
......@@ -14,18 +15,37 @@ class TestElement(unittest.TestCase):
newsulfur = pickle.loads(pickle.dumps(element.sulfur))
# make sure that a new object is not created during the pickle/unpickle
# cycle
assert element.sulfur == newsulfur
assert element.sulfur is newsulfur
assert id(element.sulfur) == id(newsulfur)
self.assertEqual(element.sulfur, newsulfur)
self.assertTrue(element.sulfur is newsulfur)
def test_attributes(self):
assert element.hydrogen.atomic_number == 1
assert element.hydrogen.symbol == 'H'
assert element.hydrogen.name == 'hydrogen'
assert element.hydrogen.mass == 1.007947 * dalton
self.assertEqual(element.hydrogen.atomic_number, 1)
self.assertEqual(element.hydrogen.symbol, 'H')
self.assertEqual(element.hydrogen.name, 'hydrogen')
self.assertEqual(element.hydrogen.mass, 1.007947 * dalton)
def test_getByMass(self):
""" Tests the getByMass method """
def exhaustive_search(mass):
"""
Searches through all element symbols and finds the one with the
smallest mass difference
"""
min_diff = mass
closest_element = None
for symbol, elem in element.Element._elements_by_symbol.items():
diff = abs(elem.mass._value - mass)
if diff < min_diff:
min_diff = diff
closest_element = elem
return closest_element
if __name__ == '__main__':
unittest.main()
# Check 5000 random numbers between 0 and 300
for i in range(5000):
mass = random.random() * 300
elem = element.Element.getByMass(mass)
self.assertTrue(elem is exhaustive_search(mass))
if __name__ == '__main__':
unittest.main()
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment