Commit 4b9221e3 authored by Jason Swails's avatar Jason Swails
Browse files

Redo the way CHARMM PSF files are parsed. This should be much more robust in

terms of supporting PSF files that don't really conform to the "standard" way
that CHARMM prints them.

The downside is that it relies on ! comment tags which I think are comments for
CHARMM.  However, the use of !NATOM and !NBOND and other tags seem to be _much_
more conserved than the general format of the PSF file across different dialects
of PSFs.  So the current approach should be more flexible.
parent 27381f70
...@@ -45,14 +45,13 @@ from simtk.openmm.app.amberprmtopfile import HCT, OBC1, OBC2, GBn, GBn2 ...@@ -45,14 +45,13 @@ from simtk.openmm.app.amberprmtopfile import HCT, OBC1, OBC2, GBn, GBn2
from simtk.openmm.app.internal.customgbforces import (GBSAHCTForce, from simtk.openmm.app.internal.customgbforces import (GBSAHCTForce,
GBSAOBC1Force, GBSAOBC2Force, GBSAGBnForce, GBSAGBn2Force) GBSAOBC1Force, GBSAOBC2Force, GBSAGBnForce, GBSAGBn2Force)
# CHARMM imports # CHARMM imports
from simtk.openmm.app.internal.charmm._charmmfile import CharmmFile
from simtk.openmm.app.internal.charmm.topologyobjects import ( from simtk.openmm.app.internal.charmm.topologyobjects import (
ResidueList, AtomList, TrackedList, Bond, Angle, Dihedral, ResidueList, AtomList, TrackedList, Bond, Angle, Dihedral,
Improper, AcceptorDonor, Group, Cmap, UreyBradley, Improper, AcceptorDonor, Group, Cmap, UreyBradley,
NoUreyBradley) NoUreyBradley)
from simtk.openmm.app.internal.charmm.exceptions import ( from simtk.openmm.app.internal.charmm.exceptions import (
CharmmPSFError, MoleculeError, CharmmPSFWarning, CharmmPSFError, MoleculeError, CharmmPSFWarning,
MissingParameter) MissingParameter, CharmmPsfEOF)
import warnings import warnings
TINY = 1e-8 TINY = 1e-8
...@@ -74,6 +73,29 @@ def _catchindexerror(func): ...@@ -74,6 +73,29 @@ def _catchindexerror(func):
return newfunc return newfunc
class _ZeroDict(dict):
"""
Contains a dict that returns dummy (zero) arguments when a key is not
present rather than raising a KeyError. The return value for non-existent
items is (0, []). It also special-case sections that have multiple pointers
to avoid index errors if those are not present in the PSF file
"""
def __getitem__(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
if key.startswith('NGRP'):
for k in self:
if k.startswith('NGRP'):
return dict.__getitem__(self, k)
return [0, 0], []
elif key.startswith('NUMLP'):
for k in self:
if k.startswith('NUMLP'):
return dict.__getitem__(self, k)
return [0, 0], []
return 0, []
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class CharmmPsfFile(object): class CharmmPsfFile(object):
...@@ -128,8 +150,8 @@ class CharmmPsfFile(object): ...@@ -128,8 +150,8 @@ class CharmmPsfFile(object):
@_catchindexerror @_catchindexerror
def __init__(self, psf_name): def __init__(self, psf_name):
""" """
Opens and parses a PSF file, then instantiates a CharmmPsfFile instance Opens and parses a PSF file, then instantiates a CharmmPsfFile
from the data. instance from the data.
Parameters: Parameters:
psf_name (str) : Name of the PSF file (it must exist) psf_name (str) : Name of the PSF file (it must exist)
...@@ -143,33 +165,32 @@ class CharmmPsfFile(object): ...@@ -143,33 +165,32 @@ class CharmmPsfFile(object):
if not os.path.exists(psf_name): if not os.path.exists(psf_name):
raise IOError('Could not find PSF file %s' % psf_name) raise IOError('Could not find PSF file %s' % psf_name)
# Open the PSF and read the first line. It must start with "PSF" # Open the PSF and read the first line. It must start with "PSF"
psf = CharmmFile(psf_name, 'r') psf = open(psf_name, 'r')
line = psf.readline() line = psf.readline()
if not line.startswith('PSF'): if not line.startswith('PSF'):
raise CharmmPSFError('Unrecognized PSF file. First line is %s' % raise CharmmPSFError('Unrecognized PSF file. First line is %s' %
line.strip()) line.strip())
# Store the flags # Store the flags
psf_flags = line.split()[1:] psf_flags = line.split()[1:]
# Next line is blank # Now get all of the sections and store them in a dict
psf.readline() psf.readline()
# The next line has one number -- the number of title lines # Now get all of the sections
ntitle = conv(psf.readline().strip(), int, 'title count') psfsections = _ZeroDict()
while True:
try:
sec, ptr, data = CharmmPsfFile._parse_psf_section(psf)
except CharmmPsfEOF:
break
psfsections[sec] = (ptr, data)
# store the title # store the title
title = list() title = psfsections['NTITLE'][1]
for i in range(ntitle):
title.append(psf.readline().rstrip())
# Skip all blank lines. Most of the time there is only 1, but I've seen
# a file that has 2
line = psf.readline().strip()
while not line:
line = psf.readline().strip()
# Next is the number of atoms # Next is the number of atoms
natom = conv(line, int, 'natom') natom = conv(psfsections['NATOM'][0], int, 'natom')
# Parse all of the atoms # Parse all of the atoms
residue_list = ResidueList() residue_list = ResidueList()
atom_list = AtomList() atom_list = AtomList()
for i in xrange(natom): for i in xrange(natom):
words = psf.readline().split() words = psfsections['NATOM'][1][i].split()
atid = int(words[0]) atid = int(words[0])
if atid != i + 1: if atid != i + 1:
raise CharmmPSFError('Nonsequential atoms detected!') raise CharmmPSFError('Nonsequential atoms detected!')
...@@ -190,10 +211,10 @@ class CharmmPsfFile(object): ...@@ -190,10 +211,10 @@ class CharmmPsfFile(object):
attype, charge, mass, props) attype, charge, mass, props)
atom_list.append(atom) atom_list.append(atom)
atom_list.assign_indexes() atom_list.assign_indexes()
# Eat the next line atom_list.changed = False
psf.readline()
# Now get the number of bonds # Now get the number of bonds
nbond, holder = CharmmPsfFile._parse_psf_section(psf, int) nbond = conv(psfsections['NBOND'][0], int, 'number of bonds')
holder = psfsections['NBOND'][1]
bond_list = TrackedList() bond_list = TrackedList()
if len(holder) != nbond * 2: if len(holder) != nbond * 2:
raise CharmmPSFError('Got %d indexes for %d bonds' % raise CharmmPSFError('Got %d indexes for %d bonds' %
...@@ -204,7 +225,8 @@ class CharmmPsfFile(object): ...@@ -204,7 +225,8 @@ class CharmmPsfFile(object):
bond_list.append(Bond(atom_list[id1], atom_list[id2])) bond_list.append(Bond(atom_list[id1], atom_list[id2]))
bond_list.changed = False bond_list.changed = False
# Now get the number of angles and the angle list # Now get the number of angles and the angle list
ntheta, holder = CharmmPsfFile._parse_psf_section(psf, int) ntheta = conv(psfsections['NTHETA'][0], int, 'number of angles')
holder = psfsections['NTHETA'][1]
angle_list = TrackedList() angle_list = TrackedList()
if len(holder) != ntheta * 3: if len(holder) != ntheta * 3:
raise CharmmPSFError('Got %d indexes for %d angles' % raise CharmmPSFError('Got %d indexes for %d angles' %
...@@ -218,7 +240,8 @@ class CharmmPsfFile(object): ...@@ -218,7 +240,8 @@ class CharmmPsfFile(object):
) )
angle_list.changed = False angle_list.changed = False
# Now get the number of torsions and the torsion list # Now get the number of torsions and the torsion list
nphi, holder = CharmmPsfFile._parse_psf_section(psf, int) nphi = conv(psfsections['NPHI'][0], int, 'number of torsions')
holder = psfsections['NPHI'][1]
dihedral_list = TrackedList() dihedral_list = TrackedList()
if len(holder) != nphi * 4: if len(holder) != nphi * 4:
raise CharmmPSFError('Got %d indexes for %d torsions' % raise CharmmPSFError('Got %d indexes for %d torsions' %
...@@ -234,7 +257,8 @@ class CharmmPsfFile(object): ...@@ -234,7 +257,8 @@ class CharmmPsfFile(object):
) )
dihedral_list.changed = False dihedral_list.changed = False
# Now get the number of improper torsions # Now get the number of improper torsions
nimphi, holder = CharmmPsfFile._parse_psf_section(psf, int) nimphi = conv(psfsections['NIMPHI'][0], int, 'number of impropers')
holder = psfsections['NIMPHI'][1]
improper_list = TrackedList() improper_list = TrackedList()
if len(holder) != nimphi * 4: if len(holder) != nimphi * 4:
raise CharmmPSFError('Got %d indexes for %d impropers' % raise CharmmPSFError('Got %d indexes for %d impropers' %
...@@ -250,7 +274,8 @@ class CharmmPsfFile(object): ...@@ -250,7 +274,8 @@ class CharmmPsfFile(object):
) )
improper_list.changed = False improper_list.changed = False
# Now handle the donors (what is this used for??) # Now handle the donors (what is this used for??)
ndon, holder = CharmmPsfFile._parse_psf_section(psf, int) ndon = conv(psfsections['NDON'][0], int, 'number of donors')
holder = psfsections['NDON'][1]
donor_list = TrackedList() donor_list = TrackedList()
if len(holder) != ndon * 2: if len(holder) != ndon * 2:
raise CharmmPSFError('Got %d indexes for %d donors' % raise CharmmPSFError('Got %d indexes for %d donors' %
...@@ -261,7 +286,8 @@ class CharmmPsfFile(object): ...@@ -261,7 +286,8 @@ class CharmmPsfFile(object):
donor_list.append(AcceptorDonor(atom_list[id1], atom_list[id2])) donor_list.append(AcceptorDonor(atom_list[id1], atom_list[id2]))
donor_list.changed = False donor_list.changed = False
# Now handle the acceptors (what is this used for??) # Now handle the acceptors (what is this used for??)
nacc, holder = CharmmPsfFile._parse_psf_section(psf, int) nacc = conv(psfsections['NACC'][0], int, 'number of acceptors')
holder = psfsections['NACC'][1]
acceptor_list = TrackedList() acceptor_list = TrackedList()
if len(holder) != nacc * 2: if len(holder) != nacc * 2:
raise CharmmPSFError('Got %d indexes for %d acceptors' % raise CharmmPSFError('Got %d indexes for %d acceptors' %
...@@ -271,16 +297,13 @@ class CharmmPsfFile(object): ...@@ -271,16 +297,13 @@ class CharmmPsfFile(object):
id2 = holder[2*i+1] - 1 id2 = holder[2*i+1] - 1
acceptor_list.append(AcceptorDonor(atom_list[id1], atom_list[id2])) acceptor_list.append(AcceptorDonor(atom_list[id1], atom_list[id2]))
acceptor_list.changed = False acceptor_list.changed = False
# Now get the NNB section. Not sure what this section is for or what it
# does...
nnb, holder = CharmmPsfFile._parse_psf_section(psf, int)
# Now get the group sections # Now get the group sections
pointers, holder = CharmmPsfFile._parse_psf_section(psf, int)
group_list = TrackedList() group_list = TrackedList()
try: try:
ngrp, nst2 = pointers ngrp, nst2 = psfsections['NGRP NST2'][0]
except ValueError: except ValueError:
raise CharmmPSFError('Could not unpack GROUP pointers') raise CharmmPSFError('Could not unpack GROUP pointers')
holder = psfsections['NGRP NST2'][1]
group_list.nst2 = nst2 group_list.nst2 = nst2
# Now handle the groups # Now handle the groups
if len(holder) != ngrp * 3: if len(holder) != ngrp * 3:
...@@ -292,19 +315,8 @@ class CharmmPsfFile(object): ...@@ -292,19 +315,8 @@ class CharmmPsfFile(object):
i3 = holder[3*i+2] i3 = holder[3*i+2]
group_list.append(Group(i1, i2, i3)) group_list.append(Group(i1, i2, i3))
group_list.changed = False group_list.changed = False
# The next section might be the number of molecules or it might be the
# cross-term (cmap) section. The first thing we'll do is determine
# molecularity based on the atom connectivity. If every PSF file was
# guaranteed to be "correct", we could just compare the MOLNT
# section with the one we compute here. However, CHARMM GUI appears
# to assign MOLNT as a dummy section (with all 1's), so this
# approach will not work. Instead, look at the value of the pointer
# and the number of entries in the group. If the # of entries is
# NATOM, assume we have MOLNT section. Warn if the MOLNT section is
# 'wrong'...
pointer, holder = CharmmPsfFile._parse_psf_section(psf, int)
# Assign all of the atoms to molecules recursively # Assign all of the atoms to molecules recursively
holder = psfsections['MOLNT'][1]
set_molecules(atom_list) set_molecules(atom_list)
molecule_list = [atom.marked for atom in atom_list] molecule_list = [atom.marked for atom in atom_list]
if len(holder) == len(atom_list): if len(holder) == len(atom_list):
...@@ -312,21 +324,14 @@ class CharmmPsfFile(object): ...@@ -312,21 +324,14 @@ class CharmmPsfFile(object):
warnings.warn('Detected PSF molecule section that is WRONG. ' warnings.warn('Detected PSF molecule section that is WRONG. '
'Resetting molecularity.', CharmmPSFWarning) 'Resetting molecularity.', CharmmPSFWarning)
# We have a CHARMM PSF file; now do NUMLP/NUMLPH sections # We have a CHARMM PSF file; now do NUMLP/NUMLPH sections
words = psf.readline().split() numlp, numlph = psfsections['NUMLP NUMLPH'][0]
numlp = conv(words[0], int, 'numlp')
numlph = conv(words[1], int, 'numlph')
if numlp != 0 or numlph != 0: if numlp != 0 or numlph != 0:
raise NotImplemented('Cannot currently handle PSFs with lone ' raise NotImplemented('Cannot currently handle PSFs with lone '
'pairs defined in the NUMLP/NUMLPH ' 'pairs defined in the NUMLP/NUMLPH '
'section.') 'section.')
psf.readline() # blank # Now do the CMAPs
# Now we get to the cross-term section ncrterm = conv(psfsections['NCRTERM'][0], int, 'Number of cross-terms')
ncrterm, holder = CharmmPsfFile._parse_psf_section(psf, int) holder = psfsections['NCRTERM'][1]
else:
ncrterm = pointer
# At this point, ncrterm and holder are both set to the CMAP list for
# VMD and non-VMD PSFs.
# Now get the cmaps
cmap_list = TrackedList() cmap_list = TrackedList()
if len(holder) != ncrterm * 8: if len(holder) != ncrterm * 8:
raise CharmmPSFError('Got %d CMAP indexes for %d cmap terms' % raise CharmmPSFError('Got %d CMAP indexes for %d cmap terms' %
...@@ -347,7 +352,6 @@ class CharmmPsfFile(object): ...@@ -347,7 +352,6 @@ class CharmmPsfFile(object):
) )
cmap_list.changed = False cmap_list.changed = False
# Now set the instance attributes
self.residue_list = residue_list self.residue_list = residue_list
self.atom_list = atom_list self.atom_list = atom_list
self.bond_list = bond_list self.bond_list = bond_list
...@@ -381,18 +385,18 @@ class CharmmPsfFile(object): ...@@ -381,18 +385,18 @@ class CharmmPsfFile(object):
raise CharmmPSFError('Could not convert %s' % message) raise CharmmPSFError('Could not convert %s' % message)
@staticmethod @staticmethod
def _parse_psf_section(psf, dtype): def _parse_psf_section(psf):
""" """
This method parses a section of the PSF file This method parses a section of the PSF file
Parameters: Parameters:
- psf (CharmmFile) : Open file that is pointing to the first line - psf (CharmmFile) : Open file that is pointing to the first line
of the section that is to be parsed of the section that is to be parsed
- dtype (type) : The data type to convert all of the data into
Returns: Returns:
(pointers, data) (title, pointers, data)
- title (str) : The label of the PSF section we are parsing
- pointers (int/tuple of ints) : If one pointer is set, pointers is - pointers (int/tuple of ints) : If one pointer is set, pointers is
simply the integer that is value of that pointer. Otherwise simply the integer that is value of that pointer. Otherwise
it is a tuple with every pointer value defined in the first it is a tuple with every pointer value defined in the first
...@@ -401,22 +405,42 @@ class CharmmPsfFile(object): ...@@ -401,22 +405,42 @@ class CharmmPsfFile(object):
to `dtype' to `dtype'
""" """
conv = CharmmPsfFile._convert conv = CharmmPsfFile._convert
words = psf.readline().split() line = psf.readline()
while not line.strip():
if not line:
raise CharmmPsfEOF('Unexpected EOF in PSF file')
else:
line = psf.readline()
if '!' in line:
words = line[:line.index('!')].split()
title = line[line.index('!')+1:].strip().upper()
# Strip out description
if ':' in title:
title = title[:title.index(':')]
else:
raise CharmmPSFError('Could not determine section title')
if len(words) == 1: if len(words) == 1:
pointers = conv(words[0], int, 'pointer') pointers = conv(words[0], int, 'pointer')
else: else:
pointers = tuple([conv(w, int, 'pointer') for w in words]) pointers = tuple([conv(w, int, 'pointer') for w in words])
line = psf.readline().strip() line = psf.readline().strip()
if not line: if not line and title.startswith('NNB'):
# This will correctly handle the NNB section (which has a spurious # This will correctly handle the NNB section (which has a spurious
# blank line) as well as any sections that have 0 members. # blank line) as well as any sections that have 0 members.
line = psf.readline().strip() line = psf.readline().strip()
data = [] data = []
while line: if title == 'NATOM' or title == 'NTITLE':
words = line.split() # Store these two sections as strings (ATOM section we will parse
data.extend([conv(w, dtype, 'PSF data') for w in words]) # later). The rest of the sections are integer pointers
line = psf.readline().strip() while line:
return pointers, data data.append(line)
line = psf.readline().strip()
else:
while line:
words = line.split()
data.extend([conv(w, int, 'PSF data') for w in words])
line = psf.readline().strip()
return title, pointers, data
def writePsf(self, dest, vmd=False): def writePsf(self, dest, vmd=False):
""" """
......
...@@ -42,6 +42,9 @@ class CharmmWarning(Warning): ...@@ -42,6 +42,9 @@ class CharmmWarning(Warning):
class CharmmPSFError(CharmmError): class CharmmPSFError(CharmmError):
""" If there is a problem parsing CHARMM PSF files """ """ If there is a problem parsing CHARMM PSF files """
class CharmmPsfEOF(CharmmError):
""" Raised when the end-of-file is reached on a CHARMM PSF file """
class SplitResidueWarning(CharmmWarning): class SplitResidueWarning(CharmmWarning):
""" For if a residue with the same number but different names is split """ """ For if a residue with the same number but different names is split """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment