Unverified Commit fe0550bb authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Create bonds based on chem_comp_bond records (#4904)

* Create bonds based on chem_comp_bond records

* Fixed a test that assumed bonds would be in a particular order
parent cfcf0dcd
......@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2015-2023 Stanford University and the Authors.
Portions copyright (c) 2015-2025 Stanford University and the Authors.
Authors: Peter Eastman
Contributors: Jason Swails
......@@ -33,15 +33,16 @@ from __future__ import division, absolute_import, print_function
__author__ = "Peter Eastman"
__version__ = "2.0"
import sys
import math
from openmm import Vec3, Platform
from datetime import date
from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
from openmm.app.internal.unitcell import computePeriodicBoxVectors, computeLengthsAndAngles
from openmm.app import Topology, PDBFile
from openmm.unit import nanometers, angstroms, is_quantity, norm, Quantity
from openmm.app import topology, Topology, PDBFile
from openmm.unit import nanometers, angstroms, is_quantity, Quantity
from . import element as elem
import sys
import math
from datetime import date
from collections import defaultdict
try:
import numpy
except:
......@@ -92,9 +93,10 @@ class PDBxFile(object):
resNameCol = atomData.getAttributeIndex('auth_comp_id')
if resNameCol == -1:
resNameCol = atomData.getAttributeIndex('label_comp_id')
resIdCol = atomData.getAttributeIndex('label_seq_id')
resNumCol = atomData.getAttributeIndex('auth_seq_id')
if resNumCol == -1:
resNumCol = atomData.getAttributeIndex('label_seq_id')
resNumCol = resIdCol
resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code')
chainIdCol = atomData.getAttributeIndex('auth_asym_id')
if chainIdCol == -1:
......@@ -123,7 +125,7 @@ class PDBxFile(object):
atomsInResidue = set()
models = []
for row in atomData.getRowList():
atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol]))
atomKey = ((row[resIdCol], row[chainIdCol], row[atomNameCol]))
model = ('1' if modelCol == -1 else row[modelCol])
if model not in models:
models.append(model)
......@@ -189,7 +191,6 @@ class PDBxFile(object):
self._positions[i] = self._positions[i]*nanometers
## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame.
self.positions = self._positions[0]
self.topology.createStandardBonds()
self._numpyPositions = None
# Record unit cell information, if present.
......@@ -228,6 +229,45 @@ class PDBxFile(object):
top.addBond(bond[0], bond[1])
existingBonds.add(bond)
# Add bonds based on chem_comp_bond records.
bondData = block.getObj('chem_comp_bond')
if bondData is not None:
# Load the bond definitions for residues.
resNameCol = bondData.getAttributeIndex('comp_id')
atom1Col = bondData.getAttributeIndex('atom_id_1')
atom2Col = bondData.getAttributeIndex('atom_id_2')
bondOrderCol = bondData.getAttributeIndex('value_order')
resBonds = defaultdict(list)
for row in bondData.getRowList():
bondOrder = None if bondOrderCol == -1 else row[bondOrderCol]
resBonds[row[resNameCol]].append((row[atom1Col], row[atom2Col], bondOrder))
# Create the bonds.
bondTypes = defaultdict(lambda: None, {
'sing': topology.Single,
'doub': topology.Double,
'trip': topology.Triple,
'arom': topology.Aromatic
})
bondOrders = defaultdict(lambda: None, {
'sing': 1,
'doub': 2,
'trip': 3
})
for res in self.topology.residues():
if res.name in resBonds:
atoms = {atom.name: atom for atom in res.atoms()}
for atom1, atom2, bondOrder in resBonds[res.name]:
if atom1 in atoms and atom2 in atoms:
self.topology.addBond(atoms[atom1], atoms[atom2], bondTypes[bondOrder], bondOrders[bondOrder])
# Add bonds for standard residues.
self.topology.createStandardBonds()
def getTopology(self):
"""Get the Topology of the model."""
return self.topology
......
......@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-2018 Stanford University and the Authors.
Portions copyright (c) 2012-2025 Stanford University and the Authors.
Authors: Peter Eastman
Contributors:
......@@ -307,6 +307,13 @@ class Topology(object):
Topology.loadBondDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'residues.xml'))
Topology._hasLoadedStandardBonds = True
# Record the existing bonds to avoid adding duplicate ones.
existingBonds = set([(bond[0], bond[1]) for bond in self._bonds])
# Add the new bonds.
for chain in self._chains:
# First build a map of atom names to atoms.
......@@ -342,7 +349,10 @@ class Topology(object):
toResidue = i
toAtom = bond[1]
if fromAtom in atomMaps[fromResidue] and toAtom in atomMaps[toResidue]:
self.addBond(atomMaps[fromResidue][fromAtom], atomMaps[toResidue][toAtom])
atom1 = atomMaps[fromResidue][fromAtom]
atom2 = atomMaps[toResidue][toAtom]
if (atom1, atom2) not in existingBonds and (atom2, atom1) not in existingBonds:
self.addBond(atom1, atom2)
def createDisulfideBonds(self, positions):
"""Identify disulfide bonds based on proximity and add them to the
......
......@@ -280,13 +280,13 @@ class TestPDBxReporter(unittest.TestCase):
validpdb = pdb
testpdb = app.PDBxFile(filename)
validBonds = list(validpdb.topology.bonds())
testBonds = list(testpdb.topology.bonds())
validBonds = set(tuple(sorted((bond[0].index, bond[1].index))) for bond in validpdb.topology.bonds())
testBonds = set(tuple(sorted((bond[0].index, bond[1].index))) for bond in testpdb.topology.bonds())
self.assertEqual(len(validBonds), len(testBonds))
for validBond, testBond in zip(validBonds, testBonds):
self.assertEqual(str(validBond), str(testBond))
for bond in validBonds:
self.assertTrue(bond in testBonds)
......
......@@ -144,6 +144,43 @@ class TestPdbxFile(unittest.TestCase):
self.assertEqual(bond1[0].name, bond2[0].name)
self.assertEqual(bond1[1].name, bond2[1].name)
def testChemCompBonds(self):
"""Test creating bonds based on chem_comp_bond records."""
pdb = PDBxFile('systems/6mvz.cif')
def bondCount(res1, atom1, res2, atom2):
count = 0
for bond in pdb.topology.bonds():
if (res1 == bond[0].residue.index and atom1 == bond[0].name) or (res2 == bond[0].residue.index and atom2 == bond[0].name):
if (res1 == bond[1].residue.index and atom1 == bond[1].name) or (res2 == bond[1].residue.index and atom2 == bond[1].name):
count += 1
return count
# Check some bonds that ought to be present.
self.assertEqual(1, bondCount(0, 'N', 0, 'CA'))
self.assertEqual(1, bondCount(0, 'C', 1, 'N'))
self.assertEqual(1, bondCount(1, 'CA', 1, 'CB'))
self.assertEqual(1, bondCount(1, 'C', 2, 'N'))
self.assertEqual(1, bondCount(2, 'CB', 2, 'CG'))
self.assertEqual(1, bondCount(2, 'C', 3, 'N'))
self.assertEqual(1, bondCount(3, 'C', 3, 'OXT'))
self.assertEqual(1, bondCount(4, 'O', 4, 'C1'))
# Check the types and orders of a few bonds.
for bond in pdb.topology.bonds():
if (bond[0].name == 'C' and bond[1].name == 'O') or (bond[1].name == 'C' and bond[0].name == 'O'):
self.assertEqual(topology.Double, bond.type)
self.assertEqual(2, bond.order)
if (bond[0].name == 'N' or bond[1].name == 'N'):
if bond[0].residue == bond[1].residue:
self.assertEqual(topology.Single, bond.type)
self.assertEqual(1, bond.order)
else:
self.assertEqual(None, bond.type)
self.assertEqual(None, bond.order)
def testMultiChain(self):
"""Test reading and writing a file that includes multiple chains"""
cif_ori = PDBxFile('systems/multichain.pdbx')
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment