Commit 97136edd authored by peastman's avatar peastman
Browse files

Merge pull request #1350 from jchodera/forcefield

Add methods to retrieve residues without matching forcefield residue templates
parents fbf0a0a2 a2289e84
...@@ -1961,6 +1961,38 @@ are :code:`wo1`\ , :code:`wo2`\ , :code:`wo3`\ , :code:`wx1`\ , :code:`wx2`\ , ...@@ -1961,6 +1961,38 @@ are :code:`wo1`\ , :code:`wo2`\ , :code:`wo3`\ , :code:`wx1`\ , :code:`wx2`\ ,
:code:`wx3`\ , :code:`wy1`\ , :code:`wy2`\ , :code:`wy3`\ , :code:`p1`\ , :code:`wx3`\ , :code:`wy1`\ , :code:`wy2`\ , :code:`wy3`\ , :code:`p1`\ ,
:code:`p2`\ , and :code:`p3`\ . :code:`p2`\ , and :code:`p3`\ .
Missing residue templates
=========================
.. CAUTION::
These features are experimental, and its API is subject to change.
You can use the :method:`getUnmatchedResidues()` method to get a list of residues
in the provided :code:`topology` object that do not currently have a matching
residue template defined in the :class:`ForceField`.
::
pdb = PDBFile('input.pdb')
forcefield = ForceField('amber99sb.xml', 'tip3p.xml')
unmatched_residues = forcefield.getUnmatchedResidues(topology)
This is useful for idenfitying issues with prepared systems, debugging issues
with residue template definitions, or identifying which additional residues need
to be parameterized.
As a convenience for parameterizing new residues, you can also get a list of
residues and empty residue templates using :method:`generateTemplatesForUnmatchedResidues`
::
pdb = PDBFile('input.pdb')
forcefield = ForceField('amber99sb.xml', 'tip3p.xml')
[templates, residues] = forcefield.generateTemplatesForUnmatchedResidues(topology)
# Se the atom types
for template in templates:
for atom in template.atoms:
atom.type = ... # set the atom types here
# Register the template with the forcefield.
forcefield.registerResidueTemplate(template)
<HarmonicBondForce> <HarmonicBondForce>
=================== ===================
......
...@@ -573,6 +573,104 @@ class ForceField(object): ...@@ -573,6 +573,104 @@ class ForceField(object):
break break
return [template, matches] return [template, matches]
def _buildBondedToAtomList(self, topology):
"""Build a list of which atom indices are bonded to each atom.
Parameters
----------
topology : Topology
The Topology whose bonds are to be indexed.
Returns
-------
bondedToAtom : list of set of int
bondedToAtom[index] is the set of atom indices bonded to atom `index`
"""
bondedToAtom = []
for atom in topology.atoms():
bondedToAtom.append(set())
for (atom1, atom2) in topology.bonds():
bondedToAtom[atom1.index].add(atom2.index)
bondedToAtom[atom2.index].add(atom1.index)
return bondedToAtom
def getUnmatchedResidues(self, topology):
"""Return a list of Residue objects from specified topology for which no forcefield templates are available.
.. CAUTION:: This method is experimental, and its API is subject to change.
Parameters
----------
topology : Topology
The Topology whose residues are to be checked against the forcefield residue templates.
Returns
-------
unmatched_residues : list of Residue
List of Residue objects from `topology` for which no forcefield residue templates are available.
Note that multiple instances of the same residue appearing at different points in the topology may be returned.
This method may be of use in generating missing residue templates or diagnosing parameterization failures.
"""
# Find the template matching each residue, compiling a list of residues for which no templates are available.
bondedToAtom = self._buildBondedToAtomList(topology)
unmatched_residues = list() # list of unmatched residues
for chain in topology.chains():
for res in chain.residues():
# Attempt to match one of the existing templates.
[template, matches] = self._getResidueTemplateMatches(res, bondedToAtom)
if matches is None:
# No existing templates match.
unmatched_residues.append(res)
return unmatched_residues
def generateTemplatesForUnmatchedResidues(self, topology):
"""Generate forcefield residue templates for residues in specified topology for which no forcefield templates are available.
.. CAUTION:: This method is experimental, and its API is subject to change.
Parameters
----------
topology : Topology
The Topology whose residues are to be checked against the forcefield residue templates.
Returns
-------
templates : list of _TemplateData
List of forcefield residue templates corresponding to residues in `topology` for which no forcefield templates are currently available.
Atom types will be set to `None`, but template name, atom names, elements, and connectivity will be taken from corresponding Residue objects.
residues : list of Residue
List of Residue objects that were used to generate the templates.
`residues[index]` is the Residue that was used to generate the template `templates[index]`
"""
# Get a non-unique list of unmatched residues.
unmatched_residues = self.getUnmatchedResidues(topology)
# Generate a unique list of unmatched residues by comparing fingerprints.
bondedToAtom = self._buildBondedToAtomList(topology)
unique_unmatched_residues = list() # list of unique unmatched Residue objects from topology
templates = list() # corresponding _TemplateData templates
signatures = set()
for residue in unmatched_residues:
signature = _createResidueSignature([ atom.element for atom in residue.atoms() ])
template = _createResidueTemplate(residue)
is_unique = True
if signature in signatures:
# Signature is the same as an existing residue; check connectivity.
for check_residue in unique_unmatched_residues:
matches = _matchResidue(check_residue, template, bondedToAtom)
if matches is not None:
is_unique = False
if is_unique:
# Residue is unique.
unique_unmatched_residues.append(residue)
signatures.add(signature)
templates.append(template)
return [templates, unique_unmatched_residues]
def createSystem(self, topology, nonbondedMethod=NoCutoff, nonbondedCutoff=1.0*unit.nanometer, def createSystem(self, topology, nonbondedMethod=NoCutoff, nonbondedCutoff=1.0*unit.nanometer,
constraints=None, rigidWater=True, removeCMMotion=True, hydrogenMass=None, **args): constraints=None, rigidWater=True, removeCMMotion=True, hydrogenMass=None, **args):
"""Construct an OpenMM System representing a Topology with this force field. """Construct an OpenMM System representing a Topology with this force field.
...@@ -853,7 +951,6 @@ def _createResidueSignature(elements): ...@@ -853,7 +951,6 @@ def _createResidueSignature(elements):
s += element.symbol+str(count) s += element.symbol+str(count)
return s return s
def _matchResidue(res, template, bondedToAtom): def _matchResidue(res, template, bondedToAtom):
"""Determine whether a residue matches a template and return a list of corresponding atoms. """Determine whether a residue matches a template and return a list of corresponding atoms.
...@@ -998,6 +1095,34 @@ def _findMatchErrors(forcefield, res): ...@@ -998,6 +1095,34 @@ def _findMatchErrors(forcefield, res):
return 'The set of atoms is similar to %s, but it is missing %d atoms.' % (bestMatchName, numBestMatchAtoms-numResidueAtoms) return 'The set of atoms is similar to %s, but it is missing %d atoms.' % (bestMatchName, numBestMatchAtoms-numResidueAtoms)
return 'This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.' return 'This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.'
def _createResidueTemplate(residue):
"""Create a _TemplateData template from a Residue object.
Parameters
----------
residue : Residue
The Residue from which the template is to be constructed.
Returns
-------
template : _TemplateData
The residue template, with atom types set to None.
This method may be useful in creating new residue templates for residues without templates defined by the ForceField.
"""
template = ForceField._TemplateData(residue.name)
for atom in residue.atoms():
template.atoms.append(ForceField._TemplateAtomData(atom.name, None, atom.element))
for (atom1,atom2) in residue.internal_bonds():
template.addBondByName(atom1.name, atom2.name)
residue_atoms = [ atom for atom in residue.atoms() ]
for (atom1,atom2) in residue.external_bonds():
if atom1 in residue_atoms:
template.addExternalBondByName(atom1.name)
elif atom2 in residue_atoms:
template.addExternalBondByName(atom2.name)
return template
# The following classes are generators that know how to create Force subclasses and add them to a System that is being # The following classes are generators that know how to create Force subclasses and add them to a System that is being
# created. Each generator class must define two methods: 1) a static method that takes an etree Element and a ForceField, # created. Each generator class must define two methods: 1) a static method that takes an etree Element and a ForceField,
...@@ -4558,3 +4683,5 @@ class DrudeGenerator: ...@@ -4558,3 +4683,5 @@ class DrudeGenerator:
drude.addScreenedPair(drude1, drude2, thole1+thole2) drude.addScreenedPair(drude1, drude2, thole1+thole2)
parsers["DrudeForce"] = DrudeGenerator.parseElement parsers["DrudeForce"] = DrudeGenerator.parseElement
#=============================================================================================
...@@ -261,19 +261,11 @@ class TestForceField(unittest.TestCase): ...@@ -261,19 +261,11 @@ class TestForceField(unittest.TestCase):
from uuid import uuid4 from uuid import uuid4
template_name = uuid4() template_name = uuid4()
# Create residue template. # Create residue template.
template = ForceField._TemplateData(template_name) from simtk.openmm.app.forcefield import _createResidueTemplate
for atom in residue.atoms(): template = _createResidueTemplate(residue) # use helper function
typename = 'XXX' template.name = template_name # replace template name
atom_template = ForceField._TemplateAtomData(atom.name, typename, atom.element) for (template_atom, residue_atom) in zip(template.atoms, residue.atoms()):
template.atoms.append(atom_template) template_atom.type = 'XXX' # replace atom type
for (atom1,atom2) in residue.internal_bonds():
template.addBondByName(atom1.name, atom2.name)
residue_atoms = [ atom for atom in residue.atoms() ]
for (atom1,atom2) in residue.external_bonds():
if atom1 in residue_atoms:
template.addExternalBondByName(atom1.name)
elif atom2 in residue_atoms:
template.addExternalBondByName(atom2.name)
# Register the template. # Register the template.
forcefield.registerResidueTemplate(template) forcefield.registerResidueTemplate(template)
...@@ -347,6 +339,98 @@ class TestForceField(unittest.TestCase): ...@@ -347,6 +339,98 @@ class TestForceField(unittest.TestCase):
system = forcefield.createSystem(pdb.topology, nonbondedMethod=test['nonbondedMethod']) system = forcefield.createSystem(pdb.topology, nonbondedMethod=test['nonbondedMethod'])
# TODO: Test energies are finite? # TODO: Test energies are finite?
def test_getUnmatchedResidues(self):
"""Test retrieval of list of residues for which no templates are available."""
# Load the PDB file.
pdb = PDBFile(os.path.join('systems', 'T4-lysozyme-L99A-p-xylene-implicit.pdb'))
# Create a ForceField object.
forcefield = ForceField('amber99sb.xml', 'tip3p.xml')
# Get list of unmatched residues.
unmatched_residues = forcefield.getUnmatchedResidues(pdb.topology)
# Check results.
self.assertEqual(len(unmatched_residues), 1)
self.assertEqual(unmatched_residues[0].name, 'TMP')
self.assertEqual(unmatched_residues[0].id, '163')
# Load the PDB file.
pdb = PDBFile(os.path.join('systems', 'ala_ala_ala.pdb'))
# Create a ForceField object.
forcefield = ForceField('tip3p.xml')
# Get list of unmatched residues.
unmatched_residues = forcefield.getUnmatchedResidues(pdb.topology)
# Check results.
self.assertEqual(len(unmatched_residues), 3)
self.assertEqual(unmatched_residues[0].name, 'ALA')
self.assertEqual(unmatched_residues[0].chain.id, 'X')
self.assertEqual(unmatched_residues[0].id, '1')
def test_ggenerateTemplatesForUnmatchedResidues(self):
"""Test generation of blank forcefield residue templates for unmatched residues."""
#
# Test where we generate parameters for only a ligand.
#
# Load the PDB file.
pdb = PDBFile(os.path.join('systems', 'nacl-water.pdb'))
# Create a ForceField object.
forcefield = ForceField('tip3p.xml')
# Get list of unmatched residues.
unmatched_residues = forcefield.getUnmatchedResidues(pdb.topology)
[templates, residues] = forcefield.generateTemplatesForUnmatchedResidues(pdb.topology)
# Check results.
self.assertEqual(len(unmatched_residues), 24)
self.assertEqual(len(residues), 2)
self.assertEqual(len(templates), 2)
unique_names = set([ residue.name for residue in residues ])
self.assertTrue('HOH' not in unique_names)
self.assertTrue('NA' in unique_names)
self.assertTrue('CL' in unique_names)
template_names = set([ template.name for template in templates ])
self.assertTrue('HOH' not in template_names)
self.assertTrue('NA' in template_names)
self.assertTrue('CL' in template_names)
# Define forcefield parameters using returned templates.
# NOTE: This parameter definition file will currently only work for residues that either have
# no external bonds or external bonds to other residues parameterized by the simpleTemplateGenerator.
simple_ffxml_contents = """
<ForceField>
<AtomTypes>
<Type name="XXX" class="XXX" element="C" mass="12.0"/>
</AtomTypes>
<HarmonicBondForce>
<Bond type1="XXX" type2="XXX" length="0.1409" k="392459.2"/>
</HarmonicBondForce>
<HarmonicAngleForce>
<Angle type1="XXX" type2="XXX" type3="XXX" angle="2.09439510239" k="527.184"/>
</HarmonicAngleForce>
<NonbondedForce coulomb14scale="0.833333" lj14scale="0.5">
<Atom type="XXX" charge="0.000" sigma="0.315" epsilon="0.635"/>
</NonbondedForce>
</ForceField>"""
#
# Test the pre-geenration of missing residue template for a ligand.
#
# Load the PDB file.
pdb = PDBFile(os.path.join('systems', 'T4-lysozyme-L99A-p-xylene-implicit.pdb'))
# Create a ForceField object.
forcefield = ForceField('amber99sb.xml', 'tip3p.xml', StringIO(simple_ffxml_contents))
# Get list of unique unmatched residues.
[templates, residues] = forcefield.generateTemplatesForUnmatchedResidues(pdb.topology)
# Add residue templates to forcefield.
for template in templates:
# Replace atom types.
for atom in template.atoms:
atom.type = 'XXX'
# Register the template.
forcefield.registerResidueTemplate(template)
# Parameterize system.
system = forcefield.createSystem(pdb.topology, nonbondedMethod=NoCutoff)
# TODO: Test energies are finite?
class AmoebaTestForceField(unittest.TestCase): class AmoebaTestForceField(unittest.TestCase):
"""Test the ForceField.createSystem() method with the AMOEBA forcefield.""" """Test the ForceField.createSystem() method with the AMOEBA forcefield."""
...@@ -475,4 +559,3 @@ class AmoebaTestForceField(unittest.TestCase): ...@@ -475,4 +559,3 @@ class AmoebaTestForceField(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment