Commit 71208da2 authored by Evan Pretti's avatar Evan Pretti
Browse files

Output more detailed messages when template matching fails

parent 2abeb801
...@@ -40,7 +40,7 @@ import math ...@@ -40,7 +40,7 @@ import math
import warnings import warnings
from math import sqrt, cos from math import sqrt, cos
from copy import deepcopy from copy import deepcopy
from collections import defaultdict from collections import Counter, defaultdict
import openmm as mm import openmm as mm
import openmm.unit as unit import openmm.unit as unit
from . import element as elem from . import element as elem
...@@ -1794,62 +1794,171 @@ def _applyMultiResiduePatch(data, clusters, patch, candidateTemplates, selectedT ...@@ -1794,62 +1794,171 @@ def _applyMultiResiduePatch(data, clusters, patch, candidateTemplates, selectedT
def _findMatchErrors(forcefield, res): def _findMatchErrors(forcefield, res):
"""Try to guess why a residue failed to match any template and return an error message.""" """Try to guess why a residue failed to match any template and return an error message."""
residueCounts = _countResidueAtoms([atom.element for atom in res.atoms()])
numResidueAtoms = sum(residueCounts.values())
numResidueHeavyAtoms = sum(residueCounts[element] for element in residueCounts if element not in (None, elem.hydrogen))
# Loop over templates and see how closely each one might match. def counterSubtract(counter1, counter2):
"""Subtracts two Counter objects (equivalent to counter1 - counter2 but preserves negatives)."""
difference = counter1.copy()
difference.subtract(counter2)
return difference
def elemToNum(elem):
"""Returns the atomic number of an element or 0 for None."""
return 0 if elem is None else elem.atomic_number
def makeIntBondSpec(bond):
"""Converts an internal bond to (x, y) where x and y are atomic numbers of the bonded atoms' elements, and x < y."""
elemNum1 = elemToNum(bond[0].element)
elemNum2 = elemToNum(bond[1].element)
return min(elemNum1, elemNum2), max(elemNum1, elemNum2)
def makeExtBondSpec(bond):
"""Converts an external bond to the atomic number of the element of the non-external atom."""
return elemToNum(bond.atom1.element) if bond.atom1.residue is res else elemToNum(bond.atom2.element)
def bestMatchAtom(templatesDiffs, templateNames, useHeavy):
"""Finds the best matching templates based on atoms, optionally ignoring non-heavy atoms."""
bestMatches = []
bestScore = None
for templateName in templateNames:
# Find the (signed) differences in atom counts for all elements,
# optionally considering only heavy atoms.
allDiffs = templatesDiffs[templateName].items()
diffs = [(elemNum, diff) for elemNum, diff in allDiffs if not useHeavy or elemNum > 1]
# Build a score as (x, y), where y is the sum of the magnitudes of
# the differences in element counts, and x is a flag set if the
# residue has more of any element than the template. Minimizing the
# score will, when these tuples are compared, favor templates where
# the residue is missing atoms first, and only if there are none,
# include templates for which the residue has extra atoms.
score = (any(diff > 0 for _, diff in allDiffs), sum(abs(diff) for _, diff in diffs))
if bestScore is None or score <= bestScore:
# Keep a list of every template with the lowest score seen.
if score != bestScore:
# If bestScore is None or score < bestScore, clear the list.
bestMatches.clear()
bestScore = score
bestMatches.append(templateName)
return bestMatches, bestScore
def bestMatchBond(templatesDiffs, templateNames):
"""Finds the best matching templates based on bonds."""
bestMatches = []
bestScore = None
for templateName in templateNames:
diffs = templatesDiffs[templateName].items()
# Favor templates where the residue is missing bonds first, and if
# there are none, include templates where it has extra bonds.
score = (any(diff > 0 for _, diff in diffs), sum(abs(diff) for _, diff in diffs))
if bestScore is None or score <= bestScore:
if score != bestScore:
bestMatches.clear()
bestScore = score
bestMatches.append(templateName)
return bestMatches, bestScore
def joinMessages(messages):
"""Produce a human-readable message from the individual message strings passed."""
messages = list(messages)
if len(messages) < 3:
return ' and '.join(messages)
messages[-1] = f'and {messages[-1]}'
return ', '.join(messages)
def formatDiffMessage(diffs, formatter):
"""Formats a message describing a difference between a residue and a template."""
missing, extra = [], []
for key, diff in diffs.items():
if diff < 0:
missing.append((key, -diff))
if diff > 0:
extra.append((key, diff))
messages = []
if missing:
message = joinMessages(f'{diff} {formatter(key, diff)}' for key, diff in sorted(missing))
messages.append(f'is missing {message}')
if extra:
message = joinMessages(f'{diff} {formatter(key, diff)}' for key, diff in sorted(extra))
messages.append(f'has {message} extra')
return joinMessages(messages)
def formatAtomDiff(key, diff):
"""Formats a string describing an element associated with a different number of atoms."""
return (f'{elem.Element.getByAtomicNumber(key).symbol} atom' if key else 'site') + ('' if diff == 1 else 's')
def formatBondDiff(key, diff):
"""Formats a string describing elements associated with a different number of bonds."""
name1 = elem.Element.getByAtomicNumber(key[0]).symbol if key else 'site'
name2 = elem.Element.getByAtomicNumber(key[1]).symbol if key else 'site'
return f'{name1}-{name2} bond' + ('' if diff == 1 else 's')
# Prepare fingerprints of the residue and templates based on counts of the
# elements of their atoms.
residueAtomCounts = Counter(elemToNum(atom.element) for atom in res.atoms())
def makeTemplateAtomDiff(templateName):
"""Prepares a Counter describing the difference between a residue's and a template's atoms."""
template = forcefield._templates[templateName]
return counterSubtract(residueAtomCounts, Counter(elemToNum(atom.element) for atom in template.atoms))
templatesAtomDiffs = {templateName: makeTemplateAtomDiff(templateName) for templateName in forcefield._templates}
# Compare the residue with templates, first based on heavy atoms, then if
# templates are found where all heavy atoms match, on all atoms.
bestMatches = templatesAtomDiffs.keys()
bestMatches, bestScore = bestMatchAtom(templatesAtomDiffs, bestMatches, True)
if bestMatches and bestScore[1]:
# If there are matches found and the best score's sum is non-zero
# (an imperfect match), return. Otherwise, if the best matches are
# perfect, keep filtering with additional criteria.
return f'The set of atoms is similar to {bestMatches[0]}, but {formatDiffMessage(templatesAtomDiffs[bestMatches[0]], formatAtomDiff)}.'
bestMatches, bestScore = bestMatchAtom(templatesAtomDiffs, bestMatches, False)
if bestMatches and bestScore[1]:
return f'The set of heavy atoms matches {bestMatches[0]}, but the residue {formatDiffMessage(templatesAtomDiffs[bestMatches[0]], formatAtomDiff)}.'
# We found templates that are atom-for-atom matches to the residue, so now
# prepare fingerprints of the residue and templates based on their bonds.
# The compare the residue with templates based on bonds.
residueIntBondCounts = Counter(makeIntBondSpec(bond) for bond in res.internal_bonds())
bestMatchName = None def makeTemplateIntBondDiff(templateName):
numBestMatchAtoms = 3*numResidueAtoms """Prepares a Counter describing the difference between a residue's and a template's bonds."""
numBestMatchHeavyAtoms = 2*numResidueHeavyAtoms
for templateName in forcefield._templates:
template = forcefield._templates[templateName] template = forcefield._templates[templateName]
templateCounts = _countResidueAtoms([atom.element for atom in template.atoms]) return counterSubtract(residueIntBondCounts, Counter(makeIntBondSpec((template.atoms[atom1], template.atoms[atom2])) for atom1, atom2 in template.bonds))
# Does the residue have any atoms that clearly aren't in the template? # We only need to prepare data for the templates remaining in bestMatches.
templatesIntBondDiffs = {templateName: makeTemplateIntBondDiff(templateName) for templateName in bestMatches}
if any(element not in templateCounts or templateCounts[element] < residueCounts[element] for element in residueCounts): bestMatches, bestScore = bestMatchBond(templatesIntBondDiffs, bestMatches)
continue if bestMatches and bestScore[1]:
return f'The set of atoms matches {bestMatches[0]}, but the residue {formatDiffMessage(templatesIntBondDiffs[bestMatches[0]], formatBondDiff)}.'
# If there are too many missing atoms, discard this template. # Finally, check external bonds. Normally these should always be from heavy
# atoms, so don't do separate checks excluding vs. including non-heavy atoms
# (but the check will work regardless).
numTemplateAtoms = sum(templateCounts.values()) residueExtBondCounts = Counter(makeExtBondSpec(bond) for bond in res.external_bonds())
numTemplateHeavyAtoms = sum(templateCounts[element] for element in templateCounts if element not in (None, elem.hydrogen))
if numTemplateAtoms > numBestMatchAtoms:
continue
if numTemplateHeavyAtoms > numBestMatchHeavyAtoms:
continue
# If this template has the same number of missing atoms as our previous best one, look at the name def makeTemplateExtBondDiff(templateName):
# to decide which one to use. """Prepares a Counter describing the difference between a residue's and a template's external bonds."""
template = forcefield._templates[templateName]
return counterSubtract(residueExtBondCounts, Counter(elemToNum(template.atoms[atom].element) for atom in template.externalBonds))
if numTemplateAtoms == numBestMatchAtoms: templatesExtBondDiffs = {templateName: makeTemplateExtBondDiff(templateName) for templateName in bestMatches}
if bestMatchName == res.name or res.name not in templateName:
continue bestMatches, bestScore = bestMatchAtom(templatesExtBondDiffs, bestMatches, False)
if bestMatches and bestScore[1]:
return f'The residue matches {bestMatches[0]}, but the set of externally bonded atoms {formatDiffMessage(templatesExtBondDiffs[bestMatches[0]], formatAtomDiff)}.'
# If we have matches at this point, atoms and bonds match perfectly, so the
# connectivity must be different. If bestMatches is empty, something else
# went wrong, so return a generic error message.
# Accept this as our new best match. if bestMatches:
return f'The atoms and bonds in the residue match {bestMatches[0]}, but the connectivity is different.'
bestMatchName = templateName
numBestMatchAtoms = numTemplateAtoms
numBestMatchHeavyAtoms = numTemplateHeavyAtoms
numBestMatchExtraParticles = len([atom for atom in template.atoms if atom.element is None])
# Return an appropriate error message.
if numBestMatchAtoms == numResidueAtoms:
chainResidues = list(res.chain.residues())
if len(chainResidues) > 1 and (res == chainResidues[0] or res == chainResidues[-1]):
return 'The set of atoms matches %s, but the bonds are different. Perhaps the chain is missing a terminal group?' % bestMatchName
return 'The set of atoms matches %s, but the bonds are different.' % bestMatchName
if bestMatchName is not None:
if numBestMatchHeavyAtoms == numResidueHeavyAtoms:
numResidueExtraParticles = len([atom for atom in res.atoms() if atom.element is None])
if numResidueExtraParticles == 0 and numBestMatchExtraParticles == 0:
return 'The set of atoms is similar to %s, but it is missing %d hydrogen atoms.' % (bestMatchName, numBestMatchAtoms-numResidueAtoms)
if numBestMatchExtraParticles-numResidueExtraParticles == numBestMatchAtoms-numResidueAtoms:
return 'The set of atoms is similar to %s, but it is missing %d extra particles. You can add them with Modeller.addExtraParticles().' % (bestMatchName, numBestMatchAtoms-numResidueAtoms)
return 'The set of atoms is similar to %s, but it is missing %d atoms.' % (bestMatchName, numBestMatchAtoms-numResidueAtoms)
return 'This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.' return 'This might mean your input topology is missing some atoms or bonds, or possibly that you are using the wrong force field.'
def _createResidueTemplate(residue): def _createResidueTemplate(residue):
......
...@@ -831,6 +831,104 @@ class TestForceField(unittest.TestCase): ...@@ -831,6 +831,104 @@ class TestForceField(unittest.TestCase):
self.assertEqual(templates[1].name, 'ALA') self.assertEqual(templates[1].name, 'ALA')
self.assertEqual(templates[2].name, 'CALA') self.assertEqual(templates[2].name, 'CALA')
def test_matchErrorMessages(self):
"""Test match error detection and diagnostics"""
# Load a force field to test with and prepare some lines with which to build topologies from PDB files.
forcefield = ForceField('amber14-all.xml')
pdbLines = [
"ATOM 0 CH3 ACE A 1 0 0 0 C",
"ATOM 1 HH31 ACE A 1 0 0 0 H",
"ATOM 2 HH32 ACE A 1 0 0 0 H",
"ATOM 3 HH33 ACE A 1 0 0 0 H",
"ATOM 4 C ACE A 1 0 0 0 C",
"ATOM 5 O ACE A 1 0 0 0 O",
"ATOM 6 N GLY A 2 0 0 0 N",
"ATOM 7 H GLY A 2 0 0 0 H",
"ATOM 8 CA GLY A 2 0 0 0 C",
"ATOM 9 HA2 GLY A 2 0 0 0 H",
"ATOM 10 HA3 GLY A 2 0 0 0 H",
"ATOM 11 C GLY A 2 0 0 0 C",
"ATOM 12 O GLY A 2 0 0 0 O",
"ATOM 13 N NME A 3 0 0 0 N",
"ATOM 14 H NME A 3 0 0 0 H",
"ATOM 15 CH3 NME A 3 0 0 0 C",
"ATOM 16 HH31 NME A 3 0 0 0 H",
"ATOM 17 HH32 NME A 3 0 0 0 H",
"ATOM 18 HH33 NME A 3 0 0 0 H",
]
def makeSystem(lines):
return forcefield.createSystem(PDBFile(StringIO("\n".join(lines))).topology)
# This should succeed and not produce any match errors.
self.assertEqual(makeSystem(pdbLines).getNumParticles(), 19)
# Delete CA atom from GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but is missing 1 C atom"):
makeSystem(pdbLines[:8] + pdbLines[9:])
# Add an He atom to GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but has 1 He atom extra"):
makeSystem(pdbLines[:9] + [
"ATOM 19 X1 GLY A 2 0 0 0 He",
] + pdbLines[9:])
# Delete CA atom from GLY and add an He atom.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but is missing 1 C atom and has 1 He atom extra"):
makeSystem(pdbLines[:8] + [
"ATOM 19 X1 GLY A 2 0 0 0 He",
] + pdbLines[9:])
# Add 1 He atom, 2 Li atoms, and 1 Be atom to GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms is similar to GLY, but has 1 He atom, 2 Li atoms, and 1 Be atom extra"):
makeSystem(pdbLines[:9] + [
"ATOM 19 X1 GLY A 2 0 0 0 He",
"ATOM 20 X2 GLY A 2 0 0 0 Li",
"ATOM 21 X3 GLY A 2 0 0 0 Li",
"ATOM 22 X4 GLY A 2 0 0 0 Be",
] + pdbLines[9:])
# Add a virtual site to GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue has 1 site extra"):
makeSystem(pdbLines[:9] + [
"ATOM 19 X1 GLY A 2 0 0 0 EP",
] + pdbLines[9:])
# Delete HA3 atom from GLY.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue is missing 1 H atom"):
makeSystem(pdbLines[:10] + pdbLines[11:])
# Delete HA3 atom from GLY and add a virtual site.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of heavy atoms matches GLY, but the residue is missing 1 H atom and has 1 site extra"):
makeSystem(pdbLines[:10] + [
"ATOM 19 X1 GLY A 2 0 0 0 EP",
] + pdbLines[11:])
# Rename HA3 atom to remove the CA-HA3 bond.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms matches GLY, but the residue is missing 1 H-C bond"):
makeSystem(pdbLines[:10] + [
"ATOM 10 X1 GLY A 2 0 0 0 H",
] + pdbLines[11:])
# Add an extra N-O bond.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The set of atoms matches GLY, but the residue has 1 N-O bond extra"):
makeSystem(pdbLines + [
"CONECT 6 12"
])
# Remove an external bond to NME by renaming its N atom.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The residue matches GLY, but the set of externally bonded atoms is missing 1 C atom"):
makeSystem(pdbLines[:13] + [
"ATOM 13 X1 NME A 3 0 0 0 N",
] + pdbLines[14:])
# Add an extra external bond to NME.
with self.assertRaisesRegex(ValueError, "No template found for residue.*GLY.*The residue matches GLY, but the set of externally bonded atoms has 1 O atom extra"):
makeSystem(pdbLines + [
"CONECT 12 15"
])
def test_Wildcard(self): def test_Wildcard(self):
"""Test that PeriodicTorsionForces using wildcard ('') for atom types / classes in the ffxml are correctly registered""" """Test that PeriodicTorsionForces using wildcard ('') for atom types / classes in the ffxml are correctly registered"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment