Commit 1cf42774 authored by Evan Pretti's avatar Evan Pretti
Browse files

Additional explanation messages for special cases and tests

parent 71208da2
......@@ -41,6 +41,7 @@ import warnings
from math import sqrt, cos
from copy import deepcopy
from collections import Counter, defaultdict
from difflib import SequenceMatcher
import openmm as mm
import openmm.unit as unit
from . import element as elem
......@@ -1878,23 +1879,49 @@ def _findMatchErrors(forcefield, res):
messages.append(f'is missing {message}')
if extra:
message = joinMessages(f'{diff} {formatter(key, diff)}' for key, diff in sorted(extra))
messages.append(f'has {message} extra')
messages.append(f'has {message} too many')
return joinMessages(messages)
def formatAtomDiff(key, diff):
"""Formats a string describing an element associated with a different number of atoms."""
return (f'{elem.Element.getByAtomicNumber(key).symbol} atom' if key else 'site') + ('' if diff == 1 else 's')
return (f'{elem.Element.getByAtomicNumber(key).symbol} atom' if key else 'extra site') + ('' if diff == 1 else 's')
def formatBondDiff(key, diff):
"""Formats a string describing elements associated with a different number of bonds."""
name1 = elem.Element.getByAtomicNumber(key[0]).symbol if key else 'site'
name2 = elem.Element.getByAtomicNumber(key[1]).symbol if key else 'site'
name1 = elem.Element.getByAtomicNumber(key[0]).symbol if key else 'extra site'
name2 = elem.Element.getByAtomicNumber(key[1]).symbol if key else 'extra site'
return f'{name1}-{name2} bond' + ('' if diff == 1 else 's')
# Prepare fingerprints of the residue and templates based on counts of the
# elements of their atoms.
def pickBestMatch(bestMatches):
"""If there are multiple best-scoring matches, pick one with the closest name to the residue. Call only with a non-empty list."""
if not res.name:
return bestMatches[0]
return max(bestMatches, key=lambda match: SequenceMatcher(a=res.name.strip(), b=match.strip(), autojunk=False).ratio())
# First check to see if there are no templates in the force field.
if not forcefield._templates:
return f'The force field contains no residue templates.'
# Get all elements in all templates in the force field and see if this
# residue uses an element not supported. Otherwise, prepare fingerprints of
# the residue and templates based on counts of the elements of their atoms.
supportedElements = set(elemToNum(atom.element) for template in forcefield._templates.values() for atom in template.atoms)
residueAtomCounts = Counter(elemToNum(atom.element) for atom in res.atoms())
unsupportedElements = set(residueAtomCounts.keys()) - supportedElements
if unsupportedElements:
unsupportedMessage = joinMessages(formatAtomDiff(elemNum, 0) for elemNum in sorted(unsupportedElements))
return f'The residue contains {unsupportedMessage}, which are not supported by any template in the force field.'
# It will be useful to provide a special message if this is a terminal
# residue of the chain, since this is a common cause of topology problems.
chainResidues = list(res.chain.residues())
if len(chainResidues) > 1 and (res == chainResidues[0] or res == chainResidues[-1]):
terminalMessage = ' Is the chain terminated in a way that is unsupported by the force field?'
else:
terminalMessage = ''
def makeTemplateAtomDiff(templateName):
"""Prepares a Counter describing the difference between a residue's and a template's atoms."""
......@@ -1913,10 +1940,23 @@ def _findMatchErrors(forcefield, res):
# If there are matches found and the best score's sum is non-zero
# (an imperfect match), return. Otherwise, if the best matches are
# perfect, keep filtering with additional criteria.
return f'The set of atoms is similar to {bestMatches[0]}, but {formatDiffMessage(templatesAtomDiffs[bestMatches[0]], formatAtomDiff)}.'
bestMatch = pickBestMatch(bestMatches)
return f'The set of atoms is similar to {bestMatch}, but {formatDiffMessage(templatesAtomDiffs[bestMatch], formatAtomDiff)}.{terminalMessage}'
bestMatches, bestScore = bestMatchAtom(templatesAtomDiffs, bestMatches, False)
if bestMatches and bestScore[1]:
return f'The set of heavy atoms matches {bestMatches[0]}, but the residue {formatDiffMessage(templatesAtomDiffs[bestMatches[0]], formatAtomDiff)}.'
bestMatch = pickBestMatch(bestMatches)
bestMatchDiffs = templatesAtomDiffs[bestMatch]
# Give additional help in the special cases where the residue is missing
# sites or hydrogen atoms and nothing else.
if bestMatchDiffs[0] < 0 and all(diff == 0 for key, diff in bestMatchDiffs.items() if key != 0):
specialLabel = 'it' if bestMatchDiffs[0] == -1 else 'them'
specialMessage = f' You may be able to add {specialLabel} with Modeller.addExtraParticles().'
elif bestMatchDiffs[1] < 0 and all(diff == 0 for key, diff in bestMatchDiffs.items() if key != 1):
specialLabel = 'it' if bestMatchDiffs[1] == -1 else 'them'
specialMessage = f' You may be able to add {specialLabel} with Modeller.addHydrogens().'
else:
specialMessage = terminalMessage
return f'The set of heavy atoms matches {bestMatch}, but the residue {formatDiffMessage(bestMatchDiffs, formatAtomDiff)}.{specialMessage}'
# We found templates that are atom-for-atom matches to the residue, so now
# prepare fingerprints of the residue and templates based on their bonds.
......@@ -1930,11 +1970,17 @@ def _findMatchErrors(forcefield, res):
return counterSubtract(residueIntBondCounts, Counter(makeIntBondSpec((template.atoms[atom1], template.atoms[atom2])) for atom1, atom2 in template.bonds))
# We only need to prepare data for the templates remaining in bestMatches.
templatesIntBondDiffs = {templateName: makeTemplateIntBondDiff(templateName) for templateName in bestMatches}
bestMatches, bestScore = bestMatchBond(templatesIntBondDiffs, bestMatches)
if bestMatches and bestScore[1]:
return f'The set of atoms matches {bestMatches[0]}, but the residue {formatDiffMessage(templatesIntBondDiffs[bestMatches[0]], formatBondDiff)}.'
bestMatch = pickBestMatch(bestMatches)
if not tuple(res.internal_bonds()):
# Special message when the residue is missing all internal bonds.
return (f'The set of atoms matches {bestMatch}, but the residue has no bonds between its atoms. '
'If the topology was read from a PDB, it may contain non-standard residues/names and/or be missing CONECT records.')
return f'The set of atoms matches {bestMatch}, but the residue {formatDiffMessage(templatesIntBondDiffs[bestMatch], formatBondDiff)}.'
# Finally, check external bonds. Normally these should always be from heavy
# atoms, so don't do separate checks excluding vs. including non-heavy atoms
......@@ -1951,14 +1997,24 @@ def _findMatchErrors(forcefield, res):
bestMatches, bestScore = bestMatchAtom(templatesExtBondDiffs, bestMatches, False)
if bestMatches and bestScore[1]:
return f'The residue matches {bestMatches[0]}, but the set of externally bonded atoms {formatDiffMessage(templatesExtBondDiffs[bestMatches[0]], formatAtomDiff)}.'
bestMatch = pickBestMatch(bestMatches)
bestMatchDiffs = templatesExtBondDiffs[bestMatch]
if all(value <= 0 for value in bestMatchDiffs.values()):
# Special message if external bonds are missing only, not different.
specialMessage = ' Is the chain missing a terminal capping group?'
else:
specialMessage = terminalMessage
return f'The atoms and bonds in the residue match {bestMatch}, but the set of externally bonded atoms {formatDiffMessage(bestMatchDiffs, formatAtomDiff)}.{specialMessage}'
# If we have matches at this point, atoms and bonds match perfectly, so the
# connectivity must be different. If bestMatches is empty, something else
# went wrong, so return a generic error message.
if bestMatches:
return f'The atoms and bonds in the residue match {bestMatches[0]}, but the connectivity is different.'
# Display all possible matching templates at this point to try to help,
# since we can't give any more detailed information.
return f'The atoms and bonds in the residue match {joinMessages(bestMatches)}, but the connectivity is different.'
return 'This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.'
def _createResidueTemplate(residue):
......
......@@ -835,27 +835,27 @@ class TestForceField(unittest.TestCase):
"""Test match error detection and diagnostics"""
# Load a force field to test with and prepare some lines with which to build topologies from PDB files.
forcefield = ForceField('amber14-all.xml')
forcefield = ForceField('amber14-all.xml', 'amber14/opc.xml')
pdbLines = [
"ATOM 0 CH3 ACE A 1 0 0 0 C",
"ATOM 1 HH31 ACE A 1 0 0 0 H",
"ATOM 2 HH32 ACE A 1 0 0 0 H",
"ATOM 3 HH33 ACE A 1 0 0 0 H",
"ATOM 4 C ACE A 1 0 0 0 C",
"ATOM 5 O ACE A 1 0 0 0 O",
"ATOM 6 N GLY A 2 0 0 0 N",
"ATOM 7 H GLY A 2 0 0 0 H",
"ATOM 8 CA GLY A 2 0 0 0 C",
"ATOM 9 HA2 GLY A 2 0 0 0 H",
"ATOM 10 HA3 GLY A 2 0 0 0 H",
"ATOM 11 C GLY A 2 0 0 0 C",
"ATOM 12 O GLY A 2 0 0 0 O",
"ATOM 13 N NME A 3 0 0 0 N",
"ATOM 14 H NME A 3 0 0 0 H",
"ATOM 15 CH3 NME A 3 0 0 0 C",
"ATOM 16 HH31 NME A 3 0 0 0 H",
"ATOM 17 HH32 NME A 3 0 0 0 H",
"ATOM 18 HH33 NME A 3 0 0 0 H",
'ATOM 0 CH3 ACE A 1 0 0 0 C',
'ATOM 1 HH31 ACE A 1 0 0 0 H',
'ATOM 2 HH32 ACE A 1 0 0 0 H',
'ATOM 3 HH33 ACE A 1 0 0 0 H',
'ATOM 4 C ACE A 1 0 0 0 C',
'ATOM 5 O ACE A 1 0 0 0 O',
'ATOM 6 N GLY A 2 0 0 0 N',
'ATOM 7 H GLY A 2 0 0 0 H',
'ATOM 8 CA GLY A 2 0 0 0 C',
'ATOM 9 HA2 GLY A 2 0 0 0 H',
'ATOM 10 HA3 GLY A 2 0 0 0 H',
'ATOM 11 C GLY A 2 0 0 0 C',
'ATOM 12 O GLY A 2 0 0 0 O',
'ATOM 13 N NME A 3 0 0 0 N',
'ATOM 14 H NME A 3 0 0 0 H',
'ATOM 15 CH3 NME A 3 0 0 0 C',
'ATOM 16 HH31 NME A 3 0 0 0 H',
'ATOM 17 HH32 NME A 3 0 0 0 H',
'ATOM 18 HH33 NME A 3 0 0 0 H',
]
def makeSystem(lines):
......@@ -864,71 +864,129 @@ class TestForceField(unittest.TestCase):
# This should succeed and not produce any match errors.
self.assertEqual(makeSystem(pdbLines).getNumParticles(), 19)
# Add an He atom and B atoms atom to GLY.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The residue contains He atoms and B atoms, which are not supported by any template in the force field'):
makeSystem(pdbLines[:9] + [
'ATOM 19 X1 GLY A 2 0 0 0 He',
'ATOM 20 X2 GLY A 2 0 0 0 B',
'ATOM 21 X3 GLY A 2 0 0 0 B',
] + pdbLines[9:])
# Delete CA atom from GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but is missing 1 C atom"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of atoms is similar to GLY, but is missing 1 C atom'):
makeSystem(pdbLines[:8] + pdbLines[9:])
# Add an He atom to GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but has 1 He atom extra"):
# Add an F atom to GLY.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of atoms is similar to GLY, but has 1 F atom too many'):
makeSystem(pdbLines[:9] + [
"ATOM 19 X1 GLY A 2 0 0 0 He",
'ATOM 19 X1 GLY A 2 0 0 0 F',
] + pdbLines[9:])
# Delete CA atom from GLY and add an He atom.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but is missing 1 C atom and has 1 He atom extra"):
# Delete CA atom from GLY and add an F atom.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of atoms is similar to GLY, but is missing 1 C atom and has 1 F atom too many'):
makeSystem(pdbLines[:8] + [
"ATOM 19 X1 GLY A 2 0 0 0 He",
'ATOM 19 X1 GLY A 2 0 0 0 F',
] + pdbLines[9:])
# Add 1 He atom, 2 Li atoms, and 1 Be atom to GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but has 1 He atom, 2 Li atoms, and 1 Be atom extra"):
# Add 1 F atom, 2 Cl atoms, and 1 Br atom to GLY.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of atoms is similar to GLY, but has 1 F atom, 2 Cl atoms, and 1 Br atom too many'):
makeSystem(pdbLines[:9] + [
"ATOM 19 X1 GLY A 2 0 0 0 He",
"ATOM 20 X2 GLY A 2 0 0 0 Li",
"ATOM 21 X3 GLY A 2 0 0 0 Li",
"ATOM 22 X4 GLY A 2 0 0 0 Be",
'ATOM 19 X1 GLY A 2 0 0 0 F',
'ATOM 20 X2 GLY A 2 0 0 0 Cl',
'ATOM 21 X3 GLY A 2 0 0 0 Cl',
'ATOM 22 X4 GLY A 2 0 0 0 Br',
] + pdbLines[9:])
# Add a virtual site to GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue has 1 site extra"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue has 1 extra site too many'):
makeSystem(pdbLines[:9] + [
"ATOM 19 X1 GLY A 2 0 0 0 EP",
'ATOM 19 X1 GLY A 2 0 0 0 EP',
] + pdbLines[9:])
# Delete HA3 atom from GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue is missing 1 H atom"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue is missing 1 H atom.*You may be able to add it with.*addHydrogens'):
makeSystem(pdbLines[:10] + pdbLines[11:])
# Delete HA2 and HA3 atoms from GLY.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue is missing 2 H atoms.*You may be able to add them with.*addHydrogens'):
makeSystem(pdbLines[:9] + pdbLines[11:])
# Delete HA3 atom from GLY and add a virtual site.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue is missing 1 H atom and has 1 site extra"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue is missing 1 H atom and has 1 extra site too many'):
makeSystem(pdbLines[:10] + [
"ATOM 19 X1 GLY A 2 0 0 0 EP",
'ATOM 19 X1 GLY A 2 0 0 0 EP',
] + pdbLines[11:])
# Rename HA3 atom to remove the CA-HA3 bond.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms matches GLY, but the residue is missing 1 H-C bond"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of atoms matches GLY, but the residue is missing 1 H-C bond'):
makeSystem(pdbLines[:10] + [
"ATOM 10 X1 GLY A 2 0 0 0 H",
'ATOM 10 X1 GLY A 2 0 0 0 H',
] + pdbLines[11:])
# Add an extra N-O bond.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms matches GLY, but the residue has 1 N-O bond extra"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The set of atoms matches GLY, but the residue has 1 N-O bond too many'):
makeSystem(pdbLines + [
"CONECT 6 12"
'CONECT 6 12'
])
# Remove an external bond to NME by renaming its N atom.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The residue matches GLY, but the set of externally bonded atoms is missing 1 C atom"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The atoms and bonds in the residue match GLY, but the set of externally bonded atoms is missing 1 C atom'):
makeSystem(pdbLines[:13] + [
"ATOM 13 X1 NME A 3 0 0 0 N",
'ATOM 13 X1 NME A 3 0 0 0 N',
] + pdbLines[14:])
# Add an extra external bond to NME.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The residue matches GLY, but the set of externally bonded atoms has 1 O atom extra"):
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The atoms and bonds in the residue match GLY, but the set of externally bonded atoms has 1 O atom too many'):
makeSystem(pdbLines + [
'CONECT 12 15'
])
# Delete ACE so that a capping group is missing from GLY.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The atoms and bonds in the residue match GLY, but the set of externally bonded atoms is missing 1 N atom.*Is the chain missing a terminal capping group?'):
makeSystem(pdbLines[6:])
# Keep the atom/bond element fingerprint the same but change the connectivity.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*GLY.*The atoms and bonds in the residue match GLY, but the connectivity is different'):
# Rename O to break the C=O bond, but then reattach the O to the CA.
makeSystem(pdbLines[:12] + [
'ATOM 12 X1 GLY A 2 0 0 0 O',
] + pdbLines[13:] + [
'CONECT 8 12'
])
# Make water with incorrect atom names so bonds will be missing.
pdbLines = [
'ATOM 0 X1 HOH A 1 0 0 0 O',
'ATOM 1 X2 HOH A 1 0 0 0 H',
'ATOM 2 X3 HOH A 1 0 0 0 H',
]
# Check for a special message when all bonds are missing.
forcefield = ForceField('opc3.xml')
with self.assertRaisesRegex(ValueError, 'No template found for residue.*HOH.*The set of atoms matches HOH, but the residue has no bonds between its atoms'):
makeSystem(pdbLines)
# Add a site to a residue with a force field that doesn't support sites.
with self.assertRaisesRegex(ValueError, 'No template found for residue.*HOH.*The residue contains extra sites, which are not supported by any template in the force field'):
makeSystem(pdbLines + [
"CONECT 12 15"
'ATOM 3 X4 HOH A 1 0 0 0 EP',
])
# Load a force field so that 1 site will be missing.
forcefield = ForceField('opc.xml')
with self.assertRaisesRegex(ValueError, 'No template found for residue.*HOH.*The set of heavy atoms matches HOH, but the residue is missing 1 extra site.*You may be able to add it with.*addExtraParticles'):
makeSystem(pdbLines)
# Load a force field so that 2 sites will be missing.
forcefield = ForceField('tip5p.xml')
with self.assertRaisesRegex(ValueError, 'No template found for residue.*HOH.*The set of heavy atoms matches HOH, but the residue is missing 2 extra sites.*You may be able to add them with.*addExtraParticles'):
makeSystem(pdbLines)
# Use an empty force field so that there are no templates.
forcefield = ForceField()
with self.assertRaisesRegex(ValueError, 'No template found for residue.*HOH.*The force field contains no residue templates'):
makeSystem(pdbLines)
def test_Wildcard(self):
"""Test that PeriodicTorsionForces using wildcard ('') for atom types / classes in the ffxml are correctly registered"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment