Unverified Commit fe0550bb authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Create bonds based on chem_comp_bond records (#4904)

* Create bonds based on chem_comp_bond records

* Fixed a test that assumed bonds would be in a particular order
parent cfcf0dcd
...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of ...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org. Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2015-2023 Stanford University and the Authors. Portions copyright (c) 2015-2025 Stanford University and the Authors.
Authors: Peter Eastman Authors: Peter Eastman
Contributors: Jason Swails Contributors: Jason Swails
...@@ -33,15 +33,16 @@ from __future__ import division, absolute_import, print_function ...@@ -33,15 +33,16 @@ from __future__ import division, absolute_import, print_function
__author__ = "Peter Eastman" __author__ = "Peter Eastman"
__version__ = "2.0" __version__ = "2.0"
import sys
import math
from openmm import Vec3, Platform from openmm import Vec3, Platform
from datetime import date
from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
from openmm.app.internal.unitcell import computePeriodicBoxVectors, computeLengthsAndAngles from openmm.app.internal.unitcell import computePeriodicBoxVectors, computeLengthsAndAngles
from openmm.app import Topology, PDBFile from openmm.app import topology, Topology, PDBFile
from openmm.unit import nanometers, angstroms, is_quantity, norm, Quantity from openmm.unit import nanometers, angstroms, is_quantity, Quantity
from . import element as elem from . import element as elem
import sys
import math
from datetime import date
from collections import defaultdict
try: try:
import numpy import numpy
except: except:
...@@ -92,9 +93,10 @@ class PDBxFile(object): ...@@ -92,9 +93,10 @@ class PDBxFile(object):
resNameCol = atomData.getAttributeIndex('auth_comp_id') resNameCol = atomData.getAttributeIndex('auth_comp_id')
if resNameCol == -1: if resNameCol == -1:
resNameCol = atomData.getAttributeIndex('label_comp_id') resNameCol = atomData.getAttributeIndex('label_comp_id')
resIdCol = atomData.getAttributeIndex('label_seq_id')
resNumCol = atomData.getAttributeIndex('auth_seq_id') resNumCol = atomData.getAttributeIndex('auth_seq_id')
if resNumCol == -1: if resNumCol == -1:
resNumCol = atomData.getAttributeIndex('label_seq_id') resNumCol = resIdCol
resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code') resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code')
chainIdCol = atomData.getAttributeIndex('auth_asym_id') chainIdCol = atomData.getAttributeIndex('auth_asym_id')
if chainIdCol == -1: if chainIdCol == -1:
...@@ -123,7 +125,7 @@ class PDBxFile(object): ...@@ -123,7 +125,7 @@ class PDBxFile(object):
atomsInResidue = set() atomsInResidue = set()
models = [] models = []
for row in atomData.getRowList(): for row in atomData.getRowList():
atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol])) atomKey = ((row[resIdCol], row[chainIdCol], row[atomNameCol]))
model = ('1' if modelCol == -1 else row[modelCol]) model = ('1' if modelCol == -1 else row[modelCol])
if model not in models: if model not in models:
models.append(model) models.append(model)
...@@ -189,7 +191,6 @@ class PDBxFile(object): ...@@ -189,7 +191,6 @@ class PDBxFile(object):
self._positions[i] = self._positions[i]*nanometers self._positions[i] = self._positions[i]*nanometers
## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame.
self.positions = self._positions[0] self.positions = self._positions[0]
self.topology.createStandardBonds()
self._numpyPositions = None self._numpyPositions = None
# Record unit cell information, if present. # Record unit cell information, if present.
...@@ -228,6 +229,45 @@ class PDBxFile(object): ...@@ -228,6 +229,45 @@ class PDBxFile(object):
top.addBond(bond[0], bond[1]) top.addBond(bond[0], bond[1])
existingBonds.add(bond) existingBonds.add(bond)
# Add bonds based on chem_comp_bond records.
bondData = block.getObj('chem_comp_bond')
if bondData is not None:
# Load the bond definitions for residues.
resNameCol = bondData.getAttributeIndex('comp_id')
atom1Col = bondData.getAttributeIndex('atom_id_1')
atom2Col = bondData.getAttributeIndex('atom_id_2')
bondOrderCol = bondData.getAttributeIndex('value_order')
resBonds = defaultdict(list)
for row in bondData.getRowList():
bondOrder = None if bondOrderCol == -1 else row[bondOrderCol]
resBonds[row[resNameCol]].append((row[atom1Col], row[atom2Col], bondOrder))
# Create the bonds.
bondTypes = defaultdict(lambda: None, {
'sing': topology.Single,
'doub': topology.Double,
'trip': topology.Triple,
'arom': topology.Aromatic
})
bondOrders = defaultdict(lambda: None, {
'sing': 1,
'doub': 2,
'trip': 3
})
for res in self.topology.residues():
if res.name in resBonds:
atoms = {atom.name: atom for atom in res.atoms()}
for atom1, atom2, bondOrder in resBonds[res.name]:
if atom1 in atoms and atom2 in atoms:
self.topology.addBond(atoms[atom1], atoms[atom2], bondTypes[bondOrder], bondOrders[bondOrder])
# Add bonds for standard residues.
self.topology.createStandardBonds()
def getTopology(self): def getTopology(self):
"""Get the Topology of the model.""" """Get the Topology of the model."""
return self.topology return self.topology
......
...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of ...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org. Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-2018 Stanford University and the Authors. Portions copyright (c) 2012-2025 Stanford University and the Authors.
Authors: Peter Eastman Authors: Peter Eastman
Contributors: Contributors:
...@@ -307,6 +307,13 @@ class Topology(object): ...@@ -307,6 +307,13 @@ class Topology(object):
Topology.loadBondDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'residues.xml')) Topology.loadBondDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'residues.xml'))
Topology._hasLoadedStandardBonds = True Topology._hasLoadedStandardBonds = True
# Record the existing bonds to avoid adding duplicate ones.
existingBonds = set([(bond[0], bond[1]) for bond in self._bonds])
# Add the new bonds.
for chain in self._chains: for chain in self._chains:
# First build a map of atom names to atoms. # First build a map of atom names to atoms.
...@@ -342,7 +349,10 @@ class Topology(object): ...@@ -342,7 +349,10 @@ class Topology(object):
toResidue = i toResidue = i
toAtom = bond[1] toAtom = bond[1]
if fromAtom in atomMaps[fromResidue] and toAtom in atomMaps[toResidue]: if fromAtom in atomMaps[fromResidue] and toAtom in atomMaps[toResidue]:
self.addBond(atomMaps[fromResidue][fromAtom], atomMaps[toResidue][toAtom]) atom1 = atomMaps[fromResidue][fromAtom]
atom2 = atomMaps[toResidue][toAtom]
if (atom1, atom2) not in existingBonds and (atom2, atom1) not in existingBonds:
self.addBond(atom1, atom2)
def createDisulfideBonds(self, positions): def createDisulfideBonds(self, positions):
"""Identify disulfide bonds based on proximity and add them to the """Identify disulfide bonds based on proximity and add them to the
......
...@@ -280,13 +280,13 @@ class TestPDBxReporter(unittest.TestCase): ...@@ -280,13 +280,13 @@ class TestPDBxReporter(unittest.TestCase):
validpdb = pdb validpdb = pdb
testpdb = app.PDBxFile(filename) testpdb = app.PDBxFile(filename)
validBonds = list(validpdb.topology.bonds()) validBonds = set(tuple(sorted((bond[0].index, bond[1].index))) for bond in validpdb.topology.bonds())
testBonds = list(testpdb.topology.bonds()) testBonds = set(tuple(sorted((bond[0].index, bond[1].index))) for bond in testpdb.topology.bonds())
self.assertEqual(len(validBonds), len(testBonds)) self.assertEqual(len(validBonds), len(testBonds))
for validBond, testBond in zip(validBonds, testBonds): for bond in validBonds:
self.assertEqual(str(validBond), str(testBond)) self.assertTrue(bond in testBonds)
......
...@@ -144,6 +144,43 @@ class TestPdbxFile(unittest.TestCase): ...@@ -144,6 +144,43 @@ class TestPdbxFile(unittest.TestCase):
self.assertEqual(bond1[0].name, bond2[0].name) self.assertEqual(bond1[0].name, bond2[0].name)
self.assertEqual(bond1[1].name, bond2[1].name) self.assertEqual(bond1[1].name, bond2[1].name)
def testChemCompBonds(self):
"""Test creating bonds based on chem_comp_bond records."""
pdb = PDBxFile('systems/6mvz.cif')
def bondCount(res1, atom1, res2, atom2):
count = 0
for bond in pdb.topology.bonds():
if (res1 == bond[0].residue.index and atom1 == bond[0].name) or (res2 == bond[0].residue.index and atom2 == bond[0].name):
if (res1 == bond[1].residue.index and atom1 == bond[1].name) or (res2 == bond[1].residue.index and atom2 == bond[1].name):
count += 1
return count
# Check some bonds that ought to be present.
self.assertEqual(1, bondCount(0, 'N', 0, 'CA'))
self.assertEqual(1, bondCount(0, 'C', 1, 'N'))
self.assertEqual(1, bondCount(1, 'CA', 1, 'CB'))
self.assertEqual(1, bondCount(1, 'C', 2, 'N'))
self.assertEqual(1, bondCount(2, 'CB', 2, 'CG'))
self.assertEqual(1, bondCount(2, 'C', 3, 'N'))
self.assertEqual(1, bondCount(3, 'C', 3, 'OXT'))
self.assertEqual(1, bondCount(4, 'O', 4, 'C1'))
# Check the types and orders of a few bonds.
for bond in pdb.topology.bonds():
if (bond[0].name == 'C' and bond[1].name == 'O') or (bond[1].name == 'C' and bond[0].name == 'O'):
self.assertEqual(topology.Double, bond.type)
self.assertEqual(2, bond.order)
if (bond[0].name == 'N' or bond[1].name == 'N'):
if bond[0].residue == bond[1].residue:
self.assertEqual(topology.Single, bond.type)
self.assertEqual(1, bond.order)
else:
self.assertEqual(None, bond.type)
self.assertEqual(None, bond.order)
def testMultiChain(self): def testMultiChain(self):
"""Test reading and writing a file that includes multiple chains""" """Test reading and writing a file that includes multiple chains"""
cif_ori = PDBxFile('systems/multichain.pdbx') cif_ori = PDBxFile('systems/multichain.pdbx')
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment