Unverified Commit 5cfa489f authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Improved handling of atoms with alternate locations (#3270)

* Improved handling of atoms with alternate locations

* Fixed a test failure caused by an invalid PDB file
parent 78a31f3b
...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of ...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org. Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-2018 Stanford University and the Authors. Portions copyright (c) 2012-2021 Stanford University and the Authors.
Authors: Christopher M. Bruns Authors: Christopher M. Bruns
Contributors: Peter Eastman Contributors: Peter Eastman
...@@ -41,6 +41,7 @@ from .unitcell import computePeriodicBoxVectors ...@@ -41,6 +41,7 @@ from .unitcell import computePeriodicBoxVectors
import warnings import warnings
import sys import sys
import math import math
from collections import OrderedDict
class PdbStructure(object): class PdbStructure(object):
""" """
...@@ -516,7 +517,7 @@ class Residue(object): ...@@ -516,7 +517,7 @@ class Residue(object):
self.number = number self.number = number
self.insertion_code = insertion_code self.insertion_code = insertion_code
self.atoms = [] self.atoms = []
self.atoms_by_name = {} self.atoms_by_name = OrderedDict()
self.is_first_in_chain = False self.is_first_in_chain = False
self.is_final_in_chain = False self.is_final_in_chain = False
self._current_atom = None self._current_atom = None
......
...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of ...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org. Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-2020 Stanford University and the Authors. Portions copyright (c) 2012-2021 Stanford University and the Authors.
Authors: Peter Eastman Authors: Peter Eastman
Contributors: Contributors:
...@@ -111,8 +111,12 @@ class PDBFile(object): ...@@ -111,8 +111,12 @@ class PDBFile(object):
atomReplacements = PDBFile._atomNameReplacements[resName] atomReplacements = PDBFile._atomNameReplacements[resName]
else: else:
atomReplacements = {} atomReplacements = {}
for atom in residue.iter_atoms(): processedAtomNames = set()
for atom in residue.atoms_by_name.values():
atomName = atom.get_name() atomName = atom.get_name()
if atomName in processedAtomNames or atom.residue_name != residue.get_name():
continue
processedAtomNames.add(atomName)
if atomName in atomReplacements: if atomName in atomReplacements:
atomName = atomReplacements[atomName] atomName = atomReplacements[atomName]
atomName = atomName.strip() atomName = atomName.strip()
...@@ -155,7 +159,11 @@ class PDBFile(object): ...@@ -155,7 +159,11 @@ class PDBFile(object):
coords = [] coords = []
for chain in model.iter_chains(): for chain in model.iter_chains():
for residue in chain.iter_residues(): for residue in chain.iter_residues():
for atom in residue.iter_atoms(): processedAtomNames = set()
for atom in residue.atoms_by_name.values():
if atom.get_name() in processedAtomNames or atom.residue_name != residue.get_name():
continue
processedAtomNames.add(atom.get_name())
pos = atom.get_position().value_in_unit(nanometers) pos = atom.get_position().value_in_unit(nanometers)
coords.append(Vec3(pos[0], pos[1], pos[2])) coords.append(Vec3(pos[0], pos[1], pos[2]))
self._positions.append(coords*nanometers) self._positions.append(coords*nanometers)
......
...@@ -87,7 +87,8 @@ class TestPdbFile(unittest.TestCase): ...@@ -87,7 +87,8 @@ class TestPdbFile(unittest.TestCase):
def test_AltLocs(self): def test_AltLocs(self):
"""Test reading a file that includes AltLocs""" """Test reading a file that includes AltLocs"""
pdb = PDBFile('systems/altlocs.pdb') for filename in ['altlocs.pdb', 'altlocs2.pdb']:
pdb = PDBFile(f'systems/{filename}')
self.assertEqual(1, pdb.topology.getNumResidues()) self.assertEqual(1, pdb.topology.getNumResidues())
self.assertEqual(19, pdb.topology.getNumAtoms()) self.assertEqual(19, pdb.topology.getNumAtoms())
self.assertEqual(19, len(pdb.positions)) self.assertEqual(19, len(pdb.positions))
......
ATOM 415 N ILE A 25 4.377 15.265 1.447 0.20 2.82 N
ATOM 417 CA ILE A 25 3.033 14.786 1.800 0.20 2.71 C
ATOM 419 C AILE A 25 3.077 13.514 2.641 0.20 2.38 C
ATOM 420 C BILE A 25 3.077 13.514 2.641 0.20 2.38 C
ATOM 421 O AILE A 25 2.310 13.367 3.581 0.20 2.92 O
ATOM 422 O BILE A 25 2.310 13.367 3.581 0.20 2.92 O
ATOM 423 CB ILE A 25 2.186 14.577 0.518 0.20 2.90 C
ATOM 425 CG1 ILE A 25 1.658 15.855 -0.086 0.20 2.48 C
ATOM 427 CG2 ILE A 25 0.962 13.694 0.784 0.20 4.13 C
ATOM 429 CD1 ILE A 25 0.623 16.572 0.761 0.20 4.11 C
ATOM 431 H ILE A 25 4.728 15.070 0.644 0.20 1.70 H
ATOM 433 HA ILE A 25 2.573 15.480 2.312 0.20 2.56 H
ATOM 435 HB ILE A 25 2.769 14.106 -0.118 0.20 3.37 H
ATOM 437 HG12 ILE A 25 2.432 16.460 -0.225 0.20 2.73 H
ATOM 439 HG13AILE A 25 1.243 15.658 -0.968 0.20 2.96 H
ATOM 440 HG13BILE A 25 2.432 16.460 -0.225 0.20 2.73 H
ATOM 441 HG21 ILE A 25 0.423 14.095 1.506 0.20 3.33 H
ATOM 443 HG22 ILE A 25 0.451 13.622 -0.059 0.20 3.57 H
ATOM 445 HG23 ILE A 25 1.278 12.796 1.059 0.20 3.60 H
ATOM 447 HD11 ILE A 25 0.989 16.789 1.668 0.20 3.14 H
ATOM 449 HD12 ILE A 25 0.344 17.402 0.282 0.20 3.79 H
ATOM 451 HD13 ILE A 25 -0.170 15.974 0.843 0.20 2.83 H
REMARK 1 CREATED WITH OPENMM 7.4, 2020-01-04 REMARK 1 CREATED WITH OPENMM 7.4, 2020-01-04
HETATM 1 A 1 0.095 0.011 0.000 1.00 0.00 C HETATM 1 C A 1 0.095 0.011 0.000 1.00 0.00 C
HETATM 2 A 1 0.513 -1.098 -0.004 1.00 0.00 O HETATM 2 O A 1 0.513 -1.098 -0.004 1.00 0.00 O
HETATM 3 A 1 -1.098 0.149 0.015 1.00 0.00 H HETATM 3 H1 A 1 -1.098 0.149 0.015 1.00 0.00 H
HETATM 4 A 1 0.590 0.938 -0.011 1.00 0.00 H HETATM 4 H2 A 1 0.590 0.938 -0.011 1.00 0.00 H
TER 5 A 1 TER 5 A 1
CONECT 1 2 3 4 CONECT 1 2 3 4
CONECT 2 1 CONECT 2 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment