"examples/benchmarks/benchmark.py" did not exist on "e53bdc5eab600b4137abd80e544cb100d1acac68"
Unverified Commit fb50d376 authored by Alex Izvorski's avatar Alex Izvorski Committed by GitHub
Browse files

Fix formal charge reading from pdb files; store formal charges in topology (#4630)



* fix formal charge reading and writing in pdb files; store formal charge in topology

* use formalCharge in public api

* permissive reading

* fix writing

* fix

* add test for formal charges

---------
Co-authored-by: default avatarAlex Izvorski <alex@genesistherapeutics.ai>
parent 8defca2d
......@@ -785,8 +785,15 @@ class Atom(object):
# segment id, element_symbol, and formal_charge are not always present
self.segment_id = pdb_line[72:76].strip()
self.element_symbol = pdb_line[76:78].strip()
try: self.formal_charge = int(pdb_line[78:80])
except ValueError: self.formal_charge = None
try:
# formal_charge should always be a one digit followed by a sign, eg "2-", "3+"
# this is a bit more permissive, it will also read "-2", "+3", or "3 "
formal_charge = pdb_line[78:80]
if formal_charge.endswith('+') or formal_charge.endswith('-'):
formal_charge = formal_charge[::-1]
self.formal_charge = int(formal_charge)
except ValueError:
self.formal_charge = None
# figure out atom element
if self.element_symbol == extraParticleIdentifier:
self.element = 'EP'
......@@ -904,7 +911,7 @@ class Atom(object):
end = "%-4s%2s" % (\
self.segment_id, self.element_symbol)
formal_charge = " "
if (self.formal_charge != None): formal_charge = "%+2d" % self.formal_charge
if (self.formal_charge is not None): formal_charge = ("%+2d" % self.formal_charge)[::-1]
return names+numbers+end+formal_charge
def __str__(self):
......
......@@ -152,7 +152,7 @@ class PDBFile(object):
element = elem.get_by_symbol(upper[0])
except KeyError:
pass
newAtom = top.addAtom(atomName, element, r, str(atom.serial_number))
newAtom = top.addAtom(atomName, element, r, str(atom.serial_number), formalCharge=atom.formal_charge)
atomByNumber[atom.serial_number] = newAtom
self._positions = []
for model in pdb.iter_models(True):
......@@ -389,9 +389,13 @@ class PDBFile(object):
else:
atomName = atom.name
coords = positions[posIndex]
line = "%s%5s %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s " % (
if atom.formalCharge is not None:
formalCharge = ("%+2d" % atom.formalCharge)[::-1]
else:
formalCharge = ' '
line = "%s%5s %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s%2s" % (
recordName, _formatIndex(atomIndex, 5), atomName, resName, chainName, resId, resIC, _format_83(coords[0]),
_format_83(coords[1]), _format_83(coords[2]), symbol)
_format_83(coords[1]), _format_83(coords[2]), symbol, formalCharge)
if len(line) != 80:
raise ValueError('Fixed width overflow detected')
print(line, file=file)
......
......@@ -166,7 +166,7 @@ class Topology(object):
chain._residues.append(residue)
return residue
def addAtom(self, name, element, residue, id=None):
def addAtom(self, name, element, residue, id=None, formalCharge=None):
"""Create a new Atom and add it to the Topology.
Parameters
......@@ -180,7 +180,8 @@ class Topology(object):
id : string=None
An optional identifier for the atom. If this is omitted, an id is
generated based on the atom index.
formalCharge : int=None
An optional formal charge for the atom.
Returns
-------
Atom
......@@ -190,7 +191,7 @@ class Topology(object):
raise ValueError('All atoms within a residue must be contiguous')
if id is None:
id = str(self._numAtoms+1)
atom = Atom(name, element, self._numAtoms, residue, id)
atom = Atom(name, element, self._numAtoms, residue, id, formalCharge=formalCharge)
self._numAtoms += 1
residue._atoms.append(atom)
return atom
......@@ -452,7 +453,7 @@ class Residue(object):
class Atom(object):
"""An Atom object represents an atom within a Topology."""
def __init__(self, name, element, index, residue, id):
def __init__(self, name, element, index, residue, id, formalCharge=None):
"""Construct a new Atom. You should call addAtom() on the Topology instead of calling this directly."""
## The name of the Atom
self.name = name
......@@ -464,6 +465,8 @@ class Atom(object):
self.residue = residue
## A user defined identifier for this Atom
self.id = id
## An optional formal charge for this Atom
self.formalCharge = formalCharge
def __repr__(self):
return "<Atom %d (%s) of chain %d residue %d (%s)>" % (self.index, self.name, self.residue.chain.index, self.residue.index, self.residue.name)
......
......@@ -105,6 +105,28 @@ class TestPdbFile(unittest.TestCase):
self.assertEqual(19, len(pdb.positions))
self.assertEqual('ILE', list(pdb.topology.residues())[0].name)
def test_FormalCharges(self):
"""Test reading, and writing and re-reading of a file containing formal charges."""
pdb = PDBFile('systems/formal-charges.pdb')
for atom in pdb.topology.atoms():
if atom.index == 8:
self.assertEqual(+1, atom.formalCharge)
elif atom.index == 29:
self.assertEqual(-1, atom.formalCharge)
else:
self.assertEqual(None, atom.formalCharge)
output = StringIO()
PDBFile.writeFile(pdb.topology, pdb.positions, output)
input = StringIO(output.getvalue())
pdb = PDBFile(input)
for atom in pdb.topology.atoms():
if atom.index == 8:
self.assertEqual(+1, atom.formalCharge)
elif atom.index == 29:
self.assertEqual(-1, atom.formalCharge)
else:
self.assertEqual(None, atom.formalCharge)
def test_LargeFile(self):
"""Write and read a file with more than 100,000 atoms"""
topology = Topology()
......
ATOM 302 N LYS A 39 -21.317 5.001 -3.640 1.00 11.03 N
ATOM 303 CA LYS A 39 -22.118 5.903 -2.826 1.00 11.13 C
ATOM 304 C LYS A 39 -21.425 6.300 -1.528 1.00 11.27 C
ATOM 305 O LYS A 39 -21.979 7.114 -0.782 1.00 12.94 O
ATOM 306 CB LYS A 39 -23.466 5.249 -2.510 1.00 11.64 C
ATOM 307 CG LYS A 39 -24.271 4.857 -3.735 1.00 13.20 C
ATOM 308 CD LYS A 39 -25.586 4.211 -3.325 1.00 14.57 C
ATOM 309 CE LYS A 39 -26.389 3.755 -4.533 1.00 17.30 C
ATOM 310 NZ LYS A 39 -26.668 4.891 -5.456 1.00 19.28 N1+
ATOM 311 H LYS A 39 -21.485 4.006 -3.565 1.00 11.03 H
ATOM 312 HA LYS A 39 -22.322 6.821 -3.381 1.00 11.13 H
ATOM 313 HB3 LYS A 39 -24.066 5.953 -1.933 1.00 11.64 H
ATOM 314 HB2 LYS A 39 -23.325 4.382 -1.864 1.00 11.64 H
ATOM 315 HG3 LYS A 39 -23.706 4.154 -4.346 1.00 13.20 H
ATOM 316 HG2 LYS A 39 -24.439 5.744 -4.347 1.00 13.20 H
ATOM 317 HD3 LYS A 39 -26.177 4.910 -2.731 1.00 14.57 H
ATOM 318 HD2 LYS A 39 -25.393 3.350 -2.682 1.00 14.57 H
ATOM 319 HE3 LYS A 39 -27.335 3.320 -4.210 1.00 17.30 H
ATOM 320 HE2 LYS A 39 -25.852 2.977 -5.078 1.00 17.30 H
ATOM 321 HZ1 LYS A 39 -25.793 5.240 -5.824 1.00 19.28 H
ATOM 322 HZ2 LYS A 39 -27.244 4.571 -6.222 1.00 19.28 H
ATOM 323 HZ3 LYS A 39 -27.147 5.629 -4.959 1.00 19.28 H
ATOM 324 N ASP A 40 -20.235 5.756 -1.257 1.00 10.36 N
ATOM 325 CA ASP A 40 -19.478 5.992 -0.027 1.00 10.11 C
ATOM 326 C ASP A 40 -18.075 6.451 -0.422 1.00 10.88 C
ATOM 327 O ASP A 40 -17.085 5.743 -0.201 1.00 11.09 O
ATOM 328 CB ASP A 40 -19.441 4.709 0.809 1.00 9.45 C
ATOM 329 CG ASP A 40 -18.542 4.809 2.035 1.00 9.80 C
ATOM 330 OD1 ASP A 40 -18.434 5.917 2.620 1.00 10.02 O
ATOM 331 OD2 ASP A 40 -17.966 3.752 2.401 1.00 9.81 O1-
ATOM 332 H ASP A 40 -19.836 5.109 -1.925 1.00 10.36 H
ATOM 333 HA ASP A 40 -19.938 6.774 0.577 1.00 10.11 H
ATOM 334 HB3 ASP A 40 -19.104 3.871 0.196 1.00 9.45 H
ATOM 335 HB2 ASP A 40 -20.447 4.458 1.147 1.00 9.45 H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment