Commit a43396e0 authored by peastman's avatar peastman
Browse files

Merge pull request #1312 from jchodera/forcefield-handlers

Add hooks for parameterizing unmatched residues in ForceField
parents b438a6ea ac57dde4
......@@ -2659,6 +2659,9 @@ The ForceField class is designed to be modular and extensible. This means you
can add support for entirely new force types, such as ones implemented with
plugins.
Adding new force types
======================
For every force class, there is a “generator” class that parses the
corresponding XML tag, then creates Force objects and adds them to the System.
ForceField maintains a map of tag names to generator classes. When a ForceField
......@@ -2722,3 +2725,56 @@ parsing it.
Now you can simply create a ForceField object as usual. If an XML file contains
a :code:`<MyForce>` tag, it will be recognized and processed correctly.
Adding residue template generators
==================================
.. CAUTION::
This feature is experimental, and its API is subject to change.
Typically, when :class:`ForceField` encounters a residue it does not have a template for,
it simply raises an :code:`Exception`, since it does not know how to assign atom types for
the unknown residue.
However, :class:`ForceField` has an API for registering *residue template generators* that are
called when a residue without an existing template is encountered. These generators
may create new residue templates that match existing atom types and parameters, or can
even create new atom types and new parameters that are added to :class:`ForceField`. This
functionality can be useful for adding residue template generators that are able to
parameterize small molecules that are not represented in a protein or nucleic acid
forcefield, for example, or for creating new residue templates for post-translationally
modified residues, covalently-bound ligands, or unnatural amino acids or bases.
To register a new residue template generator named :code:`generator`, simply call the
:meth:`registerTemplateGenerator` method on an existing :class:`ForceField` object:
::
forcefield.registerTemplateGenerator(generator)
This :code:`generator` function must conform to the following API:
::
def generator(forcefield, residue):
"""
Parameters
----------
forcefield : simtk.openmm.app.ForceField
The ForceField object to which residue templates and/or parameters are to be added.
residue : simtk.openmm.app.Topology.Residue
The residue topology for which a template is to be generated.
Returns
-------
success : bool
If the generator is able to successfully parameterize the residue, `True` is returned.
If the generator cannot parameterize the residue, it should return `False` and not modify `forcefield`.
The generator should either register a residue template directly with `forcefield.registerResidueTemplate(template)`
or it should call `forcefield.loadFile(file)` to load residue definitions from an ffxml file.
It can also use the `ForceField` programmatic API to add additional atom types (via `forcefield.registerAtomType(parameters)`)
or additional parameters.
"""
The :code:`ForceField` object will be modified by the residue template generator as residues without previously
defined templates are encountered. Because these templates are added to `ForceField` as new residue
types are encountered, subsequent residues will be parameterized using the same residue templates without
calling the :code:`generator` again.
......@@ -119,11 +119,12 @@ class ForceField(object):
self._atomClasses = {'':set()}
self._forces = []
self._scripts = []
self._templateGenerators = []
for file in files:
self.loadFile(file)
def loadFile(self, file):
"""Load an XML file and add the definitions from it to this FieldField.
"""Load an XML file and add the definitions from it to this ForceField.
Parameters
----------
......@@ -139,6 +140,18 @@ class ForceField(object):
tree = etree.parse(file)
except IOError:
tree = etree.parse(os.path.join(os.path.dirname(__file__), 'data', file))
except Exception as e:
# Fail with an error message about which file could not be read.
# TODO: Also handle case where fallback to 'data' directory encounters problems,
# but this is much less worrisome because we control those files.
msg = str(e) + '\n'
if hasattr(file, 'name'):
filename = file.name
else:
filename = str(file)
msg += "ForceField.loadFile() encountered an error reading file '%s'\n" % filename
raise Exception(msg)
root = tree.getroot()
# Load the atom types.
......@@ -163,21 +176,20 @@ class ForceField(object):
if atomName in atomIndices:
raise ValueError('Residue '+resName+' contains multiple atoms named '+atomName)
atomIndices[atomName] = len(template.atoms)
template.atoms.append(ForceField._TemplateAtomData(atomName, atom.attrib['type'], self._atomTypes[atom.attrib['type']][2], params))
typeName = atom.attrib['type']
template.atoms.append(ForceField._TemplateAtomData(atomName, typeName, self._atomTypes[typeName].element, params))
for site in residue.findall('VirtualSite'):
template.virtualSites.append(ForceField._VirtualSiteData(site, atomIndices))
for bond in residue.findall('Bond'):
if 'atomName1' in bond.attrib:
template.addBond(atomIndices[bond.attrib['atomName1']], atomIndices[bond.attrib['atomName2']])
template.addBondByName(bond.attrib['atomName1'], bond.attrib['atomName2'])
else:
template.addBond(int(bond.attrib['from']), int(bond.attrib['to']))
for bond in residue.findall('ExternalBond'):
if 'atomName' in bond.attrib:
b = atomIndices[bond.attrib['atomName']]
template.addExternalBondByName(bond.attrib['atomName'])
else:
b = int(bond.attrib['from'])
template.externalBonds.append(b)
template.atoms[b].externalBonds += 1
template.addExternalBond(int(bond.attrib['from']))
self.registerResidueTemplate(template)
# Load force definitions
......@@ -211,7 +223,7 @@ class ForceField(object):
element = parameters['element']
if not isinstance(element, elem.Element):
element = elem.get_by_symbol(element)
self._atomTypes[name] = (atomClass, mass, element)
self._atomTypes[name] = ForceField._AtomType(name, atomClass, mass, element)
if atomClass in self._atomClasses:
typeSet = self._atomClasses[atomClass]
else:
......@@ -233,8 +245,66 @@ class ForceField(object):
"""Register a new script to be executed after building the System."""
self._scripts.append(script)
def registerTemplateGenerator(self, generator):
"""Register a residue template generator that can be used to parameterize residues that do not match existing forcefield templates.
This functionality can be used to add handlers to parameterize small molecules or unnatural/modified residues.
.. CAUTION:: This method is experimental, and its API is subject to change.
Parameters
----------
generator : function
A function that will be called when a residue is encountered that does not match an existing forcefield template.
When a residue without a template is encountered, the `generator` function is called with:
::
success = generator(forcefield, residue)
```
where `forcefield` is the calling `ForceField` object and `residue` is a simtk.openmm.app.topology.Residue object.
`generator` must conform to the following API:
::
Parameters
----------
forcefield : simtk.openmm.app.ForceField
The ForceField object to which residue templates and/or parameters are to be added.
residue : simtk.openmm.app.Topology.Residue
The residue topology for which a template is to be generated.
Returns
-------
success : bool
If the generator is able to successfully parameterize the residue, `True` is returned.
If the generator cannot parameterize the residue, it should return `False` and not modify `forcefield`.
The generator should either register a residue template directly with `forcefield.registerResidueTemplate(template)`
or it should call `forcefield.loadFile(file)` to load residue definitions from an ffxml file.
It can also use the `ForceField` programmatic API to add additional atom types (via `forcefield.registerAtomType(parameters)`)
or additional parameters.
"""
self._templateGenerators.append(generator)
def _findAtomTypes(self, attrib, num):
"""Parse the attributes on an XML tag to find the set of atom types for each atom it involves."""
"""Parse the attributes on an XML tag to find the set of atom types for each atom it involves.
Parameters
----------
attrib : dict of attributes
The dictionary of attributes for an XML parameter tag.
num : int
The number of atom specifiers (e.g. 'class1' through 'class4') to extract.
Returns
-------
types : list
A list of atom types that match.
"""
types = []
for i in range(num):
if num == 1:
......@@ -306,11 +376,39 @@ class ForceField(object):
self.bonds = []
self.externalBonds = []
def getAtomIndexByName(self, atom_name):
"""Look up an atom index by atom name, providing a helpful error message if not found."""
for (index, atom) in enumerate(self.atoms):
if atom.name == atom_name:
return index
# Provide a helpful error message if atom name not found.
msg = "Atom name '%s' not found in residue template '%s'.\n" % (atom_name, self.name)
msg += "Possible atom names are: %s" % str(atomIndices.keys())
raise ValueError(msg)
def addBond(self, atom1, atom2):
"""Add a bond between two atoms in a template given their indices in the template."""
self.bonds.append((atom1, atom2))
self.atoms[atom1].bondedTo.append(atom2)
self.atoms[atom2].bondedTo.append(atom1)
def addBondByName(self, atom1_name, atom2_name):
"""Add a bond between two atoms in a template given their atom names."""
atom1 = self.getAtomIndexByName(atom1_name)
atom2 = self.getAtomIndexByName(atom2_name)
self.addBond(atom1, atom2)
def addExternalBond(self, atom_index):
"""Designate that an atom in a residue template has an external bond, using atom index within template."""
self.externalBonds.append(atom_index)
self.atoms[atom_index].externalBonds += 1
def addExternalBondByName(self, atom_name):
"""Designate that an atom in a residue template has an external bond, using atom name within template."""
atom = self.getAtomIndexByName(atom_name)
self.addExternalBond(atom)
class _TemplateAtomData:
"""Inner class used to encapsulate data about an atom in a residue template definition."""
def __init__(self, name, type, element, parameters={}):
......@@ -362,6 +460,14 @@ class ForceField(object):
else:
self.excludeWith = self.atoms[0]
class _AtomType:
"""Inner class used to record atom types and associated properties."""
def __init__(self, name, atomClass, mass, element):
self.name = name
self.atomClass = atomClass
self.mass = mass
self.element = element
class _AtomTypeParameters:
"""Inner class used to record parameter values for atom types."""
def __init__(self, forcefield, forceName, atomTag, paramNames):
......@@ -433,6 +539,36 @@ class ForceField(object):
raise ValueError('%s: No parameters defined for atom type %s' % (self.forceName, t))
def _getResidueTemplateMatches(self, res, bondedToAtom):
"""Return the residue template matches, or None if none are found.
Parameters
----------
res : Topology.Residue
The residue for which template matches are to be retrieved.
bondedToAtom : list of set of int
bondedToAtom[i] is the set of atoms bonded to atom index i
Returns
-------
template : _ForceFieldTemplate
The matching forcefield residue template, or None if no matches are found.
matches : list
a list specifying which atom of the template each atom of the residue
corresponds to, or None if it does not match the template
"""
template = None
matches = None
signature = _createResidueSignature([atom.element for atom in res.atoms()])
if signature in self._templateSignatures:
for t in self._templateSignatures[signature]:
matches = _matchResidue(res, t, bondedToAtom)
if matches is not None:
template = t
break
return [template, matches]
def createSystem(self, topology, nonbondedMethod=NoCutoff, nonbondedCutoff=1.0*unit.nanometer,
constraints=None, rigidWater=True, removeCMMotion=True, hydrogenMass=None, **args):
"""Construct an OpenMM System representing a Topology with this force field.
......@@ -492,20 +628,30 @@ class ForceField(object):
data.atomBonds[bond.atom2].append(i)
# Find the template matching each residue and assign atom types.
# If no templates are found, attempt to use residue template generators to create new templates (and potentially atom types/parameters).
for chain in topology.chains():
for res in chain.residues():
template = None
matches = None
signature = _createResidueSignature([atom.element for atom in res.atoms()])
if signature in self._templateSignatures:
for t in self._templateSignatures[signature]:
matches = _matchResidue(res, t, bondedToAtom)
if matches is not None:
template = t
# Attempt to match one of the existing templates.
[template, matches] = self._getResidueTemplateMatches(res, bondedToAtom)
if matches is None:
# No existing templates match. Try any registered residue template generators.
for generator in self._templateGenerators:
if generator(self, res):
# This generator has registered a new residue template that should match.
[template, matches] = self._getResidueTemplateMatches(res, bondedToAtom)
if matches is None:
# Something went wrong because the generated template does not match the residue signature.
raise Exception('The residue handler %s indicated it had correctly parameterized residue %s, but the generated template did not match the residue signature.' % (generator.__class__.__name__, str(res)))
else:
# We successfully generated a residue template. Break out of the for loop.
break
# Raise an exception if we have found no templates that match.
if matches is None:
raise ValueError('No template found for residue %d (%s). %s' % (res.index+1, res.name, _findMatchErrors(self, res)))
# Store parameters for the matched residue template.
matchAtoms = dict(zip(matches, res.atoms()))
for atom, match in zip(res.atoms(), matches):
data.atomType[atom] = template.atoms[match].type
......@@ -518,9 +664,22 @@ class ForceField(object):
sys = mm.System()
for atom in topology.atoms():
sys.addParticle(self._atomTypes[data.atomType[atom]][1])
# Look up the atom type name, returning a helpful error message if it cannot be found.
if atom not in data.atomType:
raise Exception("Could not identify atom type for atom '%s'." % str(atom))
typename = data.atomType[atom]
# Look up the type name in the list of registered atom types, returning a helpful error message if it cannot be found.
if typename not in self._atomTypes:
msg = "Could not find typename '%s' for atom '%s' in list of known atom types.\n" % (typename, str(atom))
msg += "Known atom types are: %s" % str(self._atomTypes.keys())
raise Exception(msg)
# Add the particle to the OpenMM system.
mass = self._atomTypes[typename].mass
sys.addParticle(mass)
# Adjust masses.
# Adjust hydroten masses if requested.
if hydrogenMass is not None:
for atom1, atom2 in topology.bonds():
......@@ -536,7 +695,7 @@ class ForceField(object):
boxVectors = topology.getPeriodicBoxVectors()
if boxVectors is not None:
sys.setDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2])
elif nonbondedMethod is not NoCutoff and nonbondedMethod is not CutoffNonPeriodic:
elif nonbondedMethod not in [NoCutoff, CutoffNonPeriodic]:
raise ValueError('Requested periodic boundary conditions for a Topology that does not specify periodic box dimensions')
# Make a list of all unique angles
......@@ -650,7 +809,7 @@ class ForceField(object):
if removeCMMotion:
sys.addForce(mm.CMMotionRemover())
# Let generators do postprocessing
# Let force generators do postprocessing
for force in self._forces:
if 'postprocessSystem' in dir(force):
......
......@@ -371,6 +371,18 @@ class Residue(object):
"""Iterate over all Atoms in the Residue."""
return iter(self._atoms)
def bonds(self):
"""Iterate over all Bonds involving any atom in this residue."""
return ( bond for bond in self.chain.topology.bonds() if ((bond[0] in self._atoms) or (bond[1] in self._atoms)) )
def internal_bonds(self):
"""Iterate over all internal Bonds."""
return ( bond for bond in self.chain.topology.bonds() if ((bond[0] in self._atoms) and (bond[1] in self._atoms)) )
def external_bonds(self):
"""Iterate over all Bonds to external atoms."""
return ( bond for bond in self.chain.topology.bonds() if ((bond[0] in self._atoms) != (bond[1] in self._atoms)) )
def __len__(self):
return len(self._atoms)
......
......@@ -248,6 +248,105 @@ class TestForceField(unittest.TestCase):
self.assertEqual(params[1], 1.0*nanometers)
self.assertEqual(params[2], 0.0*kilojoule_per_mole)
def test_residueTemplateGenerator(self):
"""Test the ability to add residue template generators to parameterize unmatched residues."""
def simpleTemplateGenerator(forcefield, residue):
"""\
Simple residue template generator.
This implementation uses the programmatic API to define residue templates.
NOTE: We presume we have already loaded the force definitions into ForceField.
"""
# Generate a unique prefix name for generating parameters.
from uuid import uuid4
template_name = uuid4()
# Create residue template.
template = ForceField._TemplateData(template_name)
for atom in residue.atoms():
typename = 'XXX'
atom_template = ForceField._TemplateAtomData(atom.name, typename, atom.element)
template.atoms.append(atom_template)
for (atom1,atom2) in residue.internal_bonds():
template.addBondByName(atom1.name, atom2.name)
residue_atoms = [ atom for atom in residue.atoms() ]
for (atom1,atom2) in residue.external_bonds():
if atom1 in residue_atoms:
template.addExternalBondByName(atom1.name)
elif atom2 in residue_atoms:
template.addExternalBondByName(atom2.name)
# Register the template.
forcefield.registerResidueTemplate(template)
# Signal that we have successfully parameterized the residue.
return True
# Define forcefield parameters used by simpleTemplateGenerator.
# NOTE: This parameter definition file will currently only work for residues that either have
# no external bonds or external bonds to other residues parameterized by the simpleTemplateGenerator.
simple_ffxml_contents = """
<ForceField>
<AtomTypes>
<Type name="XXX" class="XXX" element="C" mass="12.0"/>
</AtomTypes>
<HarmonicBondForce>
<Bond type1="XXX" type2="XXX" length="0.1409" k="392459.2"/>
</HarmonicBondForce>
<HarmonicAngleForce>
<Angle type1="XXX" type2="XXX" type3="XXX" angle="2.09439510239" k="527.184"/>
</HarmonicAngleForce>
<NonbondedForce coulomb14scale="0.833333" lj14scale="0.5">
<Atom type="XXX" charge="0.000" sigma="0.315" epsilon="0.635"/>
</NonbondedForce>
</ForceField>"""
#
# Test where we generate parameters for only a ligand.
#
# Load the PDB file.
pdb = PDBFile(os.path.join('systems', 'T4-lysozyme-L99A-p-xylene-implicit.pdb'))
# Create a ForceField object.
forcefield = ForceField('amber99sb.xml', 'tip3p.xml', StringIO(simple_ffxml_contents))
# Add the residue template generator.
forcefield.registerTemplateGenerator(simpleTemplateGenerator)
# Parameterize system.
system = forcefield.createSystem(pdb.topology, nonbondedMethod=NoCutoff)
# TODO: Test energies are finite?
#
# Test for a few systems where we generate all parameters.
#
tests = [
{ 'pdb_filename' : 'alanine-dipeptide-implicit.pdb', 'nonbondedMethod' : NoCutoff },
{ 'pdb_filename' : 'lysozyme-implicit.pdb', 'nonbondedMethod' : NoCutoff },
{ 'pdb_filename' : 'alanine-dipeptide-explicit.pdb', 'nonbondedMethod' : CutoffPeriodic },
]
# Test all systems with separate ForceField objects.
for test in tests:
# Load the PDB file.
pdb = PDBFile(os.path.join('systems', test['pdb_filename']))
# Create a ForceField object.
forcefield = ForceField(StringIO(simple_ffxml_contents))
# Add the residue template generator.
forcefield.registerTemplateGenerator(simpleTemplateGenerator)
# Parameterize system.
system = forcefield.createSystem(pdb.topology, nonbondedMethod=test['nonbondedMethod'])
# TODO: Test energies are finite?
# Now test all systems with a single ForceField object.
# Create a ForceField object.
forcefield = ForceField(StringIO(simple_ffxml_contents))
# Add the residue template generator.
forcefield.registerTemplateGenerator(simpleTemplateGenerator)
for test in tests:
# Load the PDB file.
pdb = PDBFile(os.path.join('systems', test['pdb_filename']))
# Parameterize system.
system = forcefield.createSystem(pdb.topology, nonbondedMethod=test['nonbondedMethod'])
# TODO: Test energies are finite?
class AmoebaTestForceField(unittest.TestCase):
"""Test the ForceField.createSystem() method with the AMOEBA forcefield."""
......
......@@ -26,5 +26,29 @@ class TestTopology(unittest.TestCase):
"""Test getters for number of atoms, residues, chains."""
self.check_pdbfile('systems/1T2Y.pdb', 271, 25, 1)
def test_residue_bonds(self):
"""Test retrieving bonds for a residue produces expected results."""
# Create a test topology
# atom connectivity = A1-|-B1-B2-|-C1
topology = Topology()
chain = topology.addChain(id='A')
residue1 = topology.addResidue('AAA', chain)
residue2 = topology.addResidue('BBB', chain)
residue3 = topology.addResidue('CCC', chain)
atom_A1 = topology.addAtom('A1', element.carbon, residue1)
atom_B1 = topology.addAtom('B1', element.carbon, residue2)
atom_B2 = topology.addAtom('B2', element.carbon, residue2)
atom_C1 = topology.addAtom('C1', element.carbon, residue3)
topology.addBond(atom_A1, atom_B1)
topology.addBond(atom_B1, atom_B2)
topology.addBond(atom_B2, atom_C1)
# Check bonds
all_bonds = [ bond for bond in residue2.bonds() ]
internal_bonds = [ bond for bond in residue2.internal_bonds() ]
external_bonds = [ bond for bond in residue2.external_bonds() ]
self.assertEqual(all_bonds, [ (atom_A1, atom_B1), (atom_B1, atom_B2), (atom_B2, atom_C1) ])
self.assertEqual(internal_bonds, [ (atom_B1, atom_B2) ])
self.assertEqual(external_bonds, [ (atom_A1, atom_B1), (atom_B2, atom_C1) ])
if __name__ == '__main__':
unittest.main()
REMARK 1 CREATED WITH OPENMM 5.2, 2013-10-01
REMARK 1 CRYST1 ADDED MANUALLY BY JDC 2016-06-02
CRYST1 32.853 32.862 31.855 90.00 90.00 90.00 P 1 1
ATOM 1 H1 ACE A 1 15.908 11.969 16.089 1.00 0.00 H
ATOM 2 CH3 ACE A 1 15.377 12.899 15.886 1.00 0.00 C
ATOM 3 H2 ACE A 1 14.471 12.940 16.490 1.00 0.00 H
......
ACE
22
2.0000010 1.0000000 -0.0000013 2.0000010 2.0900000 0.0000001
1.4862640 2.4538490 0.8898240 1.4862590 2.4538520 -0.8898200
3.4274200 2.6407950 -0.0000030 4.3905800 1.8774060 -0.0000066
3.5553754 3.9696488 -0.0000031 2.7331200 4.5561601 -0.0000013
4.8532621 4.6139253 -0.0000043 5.4075960 4.3155388 0.8898152
5.6613044 4.2208425 -1.2321480 5.1232615 4.5213630 -2.1312016
6.6304840 4.7189354 -1.2057907 5.8085401 3.1408724 -1.2413850
4.7126759 6.1294185 0.0000014 3.6006445 6.6527027 0.0000062
5.8460533 6.8348833 0.0000025 6.7370014 6.3591620 -0.0000004
5.8460551 8.2838837 0.0000062 4.8185761 8.6477349 0.0000104
6.3597984 8.6477313 0.8898283 6.3597900 8.6477354 -0.8898187
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment