Unverified Commit fb50d376 authored by Alex Izvorski's avatar Alex Izvorski Committed by GitHub
Browse files

Fix formal charge reading from pdb files; store formal charges in topology (#4630)



* fix formal charge reading and writing in pdb files; store formal charge in topology

* use formalCharge in public api

* permissive reading

* fix writing

* fix

* add test for formal charges

---------
Co-authored-by: default avatarAlex Izvorski <alex@genesistherapeutics.ai>
parent 8defca2d
...@@ -785,8 +785,15 @@ class Atom(object): ...@@ -785,8 +785,15 @@ class Atom(object):
# segment id, element_symbol, and formal_charge are not always present # segment id, element_symbol, and formal_charge are not always present
self.segment_id = pdb_line[72:76].strip() self.segment_id = pdb_line[72:76].strip()
self.element_symbol = pdb_line[76:78].strip() self.element_symbol = pdb_line[76:78].strip()
try: self.formal_charge = int(pdb_line[78:80]) try:
except ValueError: self.formal_charge = None # formal_charge should always be a one digit followed by a sign, eg "2-", "3+"
# this is a bit more permissive, it will also read "-2", "+3", or "3 "
formal_charge = pdb_line[78:80]
if formal_charge.endswith('+') or formal_charge.endswith('-'):
formal_charge = formal_charge[::-1]
self.formal_charge = int(formal_charge)
except ValueError:
self.formal_charge = None
# figure out atom element # figure out atom element
if self.element_symbol == extraParticleIdentifier: if self.element_symbol == extraParticleIdentifier:
self.element = 'EP' self.element = 'EP'
...@@ -904,7 +911,7 @@ class Atom(object): ...@@ -904,7 +911,7 @@ class Atom(object):
end = "%-4s%2s" % (\ end = "%-4s%2s" % (\
self.segment_id, self.element_symbol) self.segment_id, self.element_symbol)
formal_charge = " " formal_charge = " "
if (self.formal_charge != None): formal_charge = "%+2d" % self.formal_charge if (self.formal_charge is not None): formal_charge = ("%+2d" % self.formal_charge)[::-1]
return names+numbers+end+formal_charge return names+numbers+end+formal_charge
def __str__(self): def __str__(self):
......
...@@ -152,7 +152,7 @@ class PDBFile(object): ...@@ -152,7 +152,7 @@ class PDBFile(object):
element = elem.get_by_symbol(upper[0]) element = elem.get_by_symbol(upper[0])
except KeyError: except KeyError:
pass pass
newAtom = top.addAtom(atomName, element, r, str(atom.serial_number)) newAtom = top.addAtom(atomName, element, r, str(atom.serial_number), formalCharge=atom.formal_charge)
atomByNumber[atom.serial_number] = newAtom atomByNumber[atom.serial_number] = newAtom
self._positions = [] self._positions = []
for model in pdb.iter_models(True): for model in pdb.iter_models(True):
...@@ -389,9 +389,13 @@ class PDBFile(object): ...@@ -389,9 +389,13 @@ class PDBFile(object):
else: else:
atomName = atom.name atomName = atom.name
coords = positions[posIndex] coords = positions[posIndex]
line = "%s%5s %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s " % ( if atom.formalCharge is not None:
formalCharge = ("%+2d" % atom.formalCharge)[::-1]
else:
formalCharge = ' '
line = "%s%5s %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s%2s" % (
recordName, _formatIndex(atomIndex, 5), atomName, resName, chainName, resId, resIC, _format_83(coords[0]), recordName, _formatIndex(atomIndex, 5), atomName, resName, chainName, resId, resIC, _format_83(coords[0]),
_format_83(coords[1]), _format_83(coords[2]), symbol) _format_83(coords[1]), _format_83(coords[2]), symbol, formalCharge)
if len(line) != 80: if len(line) != 80:
raise ValueError('Fixed width overflow detected') raise ValueError('Fixed width overflow detected')
print(line, file=file) print(line, file=file)
......
...@@ -166,7 +166,7 @@ class Topology(object): ...@@ -166,7 +166,7 @@ class Topology(object):
chain._residues.append(residue) chain._residues.append(residue)
return residue return residue
def addAtom(self, name, element, residue, id=None): def addAtom(self, name, element, residue, id=None, formalCharge=None):
"""Create a new Atom and add it to the Topology. """Create a new Atom and add it to the Topology.
Parameters Parameters
...@@ -180,7 +180,8 @@ class Topology(object): ...@@ -180,7 +180,8 @@ class Topology(object):
id : string=None id : string=None
An optional identifier for the atom. If this is omitted, an id is An optional identifier for the atom. If this is omitted, an id is
generated based on the atom index. generated based on the atom index.
formalCharge : int=None
An optional formal charge for the atom.
Returns Returns
------- -------
Atom Atom
...@@ -190,7 +191,7 @@ class Topology(object): ...@@ -190,7 +191,7 @@ class Topology(object):
raise ValueError('All atoms within a residue must be contiguous') raise ValueError('All atoms within a residue must be contiguous')
if id is None: if id is None:
id = str(self._numAtoms+1) id = str(self._numAtoms+1)
atom = Atom(name, element, self._numAtoms, residue, id) atom = Atom(name, element, self._numAtoms, residue, id, formalCharge=formalCharge)
self._numAtoms += 1 self._numAtoms += 1
residue._atoms.append(atom) residue._atoms.append(atom)
return atom return atom
...@@ -452,7 +453,7 @@ class Residue(object): ...@@ -452,7 +453,7 @@ class Residue(object):
class Atom(object): class Atom(object):
"""An Atom object represents an atom within a Topology.""" """An Atom object represents an atom within a Topology."""
def __init__(self, name, element, index, residue, id): def __init__(self, name, element, index, residue, id, formalCharge=None):
"""Construct a new Atom. You should call addAtom() on the Topology instead of calling this directly.""" """Construct a new Atom. You should call addAtom() on the Topology instead of calling this directly."""
## The name of the Atom ## The name of the Atom
self.name = name self.name = name
...@@ -464,6 +465,8 @@ class Atom(object): ...@@ -464,6 +465,8 @@ class Atom(object):
self.residue = residue self.residue = residue
## A user defined identifier for this Atom ## A user defined identifier for this Atom
self.id = id self.id = id
## An optional formal charge for this Atom
self.formalCharge = formalCharge
def __repr__(self): def __repr__(self):
return "<Atom %d (%s) of chain %d residue %d (%s)>" % (self.index, self.name, self.residue.chain.index, self.residue.index, self.residue.name) return "<Atom %d (%s) of chain %d residue %d (%s)>" % (self.index, self.name, self.residue.chain.index, self.residue.index, self.residue.name)
......
...@@ -105,6 +105,28 @@ class TestPdbFile(unittest.TestCase): ...@@ -105,6 +105,28 @@ class TestPdbFile(unittest.TestCase):
self.assertEqual(19, len(pdb.positions)) self.assertEqual(19, len(pdb.positions))
self.assertEqual('ILE', list(pdb.topology.residues())[0].name) self.assertEqual('ILE', list(pdb.topology.residues())[0].name)
def test_FormalCharges(self):
"""Test reading, and writing and re-reading of a file containing formal charges."""
pdb = PDBFile('systems/formal-charges.pdb')
for atom in pdb.topology.atoms():
if atom.index == 8:
self.assertEqual(+1, atom.formalCharge)
elif atom.index == 29:
self.assertEqual(-1, atom.formalCharge)
else:
self.assertEqual(None, atom.formalCharge)
output = StringIO()
PDBFile.writeFile(pdb.topology, pdb.positions, output)
input = StringIO(output.getvalue())
pdb = PDBFile(input)
for atom in pdb.topology.atoms():
if atom.index == 8:
self.assertEqual(+1, atom.formalCharge)
elif atom.index == 29:
self.assertEqual(-1, atom.formalCharge)
else:
self.assertEqual(None, atom.formalCharge)
def test_LargeFile(self): def test_LargeFile(self):
"""Write and read a file with more than 100,000 atoms""" """Write and read a file with more than 100,000 atoms"""
topology = Topology() topology = Topology()
......
ATOM 302 N LYS A 39 -21.317 5.001 -3.640 1.00 11.03 N
ATOM 303 CA LYS A 39 -22.118 5.903 -2.826 1.00 11.13 C
ATOM 304 C LYS A 39 -21.425 6.300 -1.528 1.00 11.27 C
ATOM 305 O LYS A 39 -21.979 7.114 -0.782 1.00 12.94 O
ATOM 306 CB LYS A 39 -23.466 5.249 -2.510 1.00 11.64 C
ATOM 307 CG LYS A 39 -24.271 4.857 -3.735 1.00 13.20 C
ATOM 308 CD LYS A 39 -25.586 4.211 -3.325 1.00 14.57 C
ATOM 309 CE LYS A 39 -26.389 3.755 -4.533 1.00 17.30 C
ATOM 310 NZ LYS A 39 -26.668 4.891 -5.456 1.00 19.28 N1+
ATOM 311 H LYS A 39 -21.485 4.006 -3.565 1.00 11.03 H
ATOM 312 HA LYS A 39 -22.322 6.821 -3.381 1.00 11.13 H
ATOM 313 HB3 LYS A 39 -24.066 5.953 -1.933 1.00 11.64 H
ATOM 314 HB2 LYS A 39 -23.325 4.382 -1.864 1.00 11.64 H
ATOM 315 HG3 LYS A 39 -23.706 4.154 -4.346 1.00 13.20 H
ATOM 316 HG2 LYS A 39 -24.439 5.744 -4.347 1.00 13.20 H
ATOM 317 HD3 LYS A 39 -26.177 4.910 -2.731 1.00 14.57 H
ATOM 318 HD2 LYS A 39 -25.393 3.350 -2.682 1.00 14.57 H
ATOM 319 HE3 LYS A 39 -27.335 3.320 -4.210 1.00 17.30 H
ATOM 320 HE2 LYS A 39 -25.852 2.977 -5.078 1.00 17.30 H
ATOM 321 HZ1 LYS A 39 -25.793 5.240 -5.824 1.00 19.28 H
ATOM 322 HZ2 LYS A 39 -27.244 4.571 -6.222 1.00 19.28 H
ATOM 323 HZ3 LYS A 39 -27.147 5.629 -4.959 1.00 19.28 H
ATOM 324 N ASP A 40 -20.235 5.756 -1.257 1.00 10.36 N
ATOM 325 CA ASP A 40 -19.478 5.992 -0.027 1.00 10.11 C
ATOM 326 C ASP A 40 -18.075 6.451 -0.422 1.00 10.88 C
ATOM 327 O ASP A 40 -17.085 5.743 -0.201 1.00 11.09 O
ATOM 328 CB ASP A 40 -19.441 4.709 0.809 1.00 9.45 C
ATOM 329 CG ASP A 40 -18.542 4.809 2.035 1.00 9.80 C
ATOM 330 OD1 ASP A 40 -18.434 5.917 2.620 1.00 10.02 O
ATOM 331 OD2 ASP A 40 -17.966 3.752 2.401 1.00 9.81 O1-
ATOM 332 H ASP A 40 -19.836 5.109 -1.925 1.00 10.36 H
ATOM 333 HA ASP A 40 -19.938 6.774 0.577 1.00 10.11 H
ATOM 334 HB3 ASP A 40 -19.104 3.871 0.196 1.00 9.45 H
ATOM 335 HB2 ASP A 40 -20.447 4.458 1.147 1.00 9.45 H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment