"wrappers/python/vscode:/vscode.git/clone" did not exist on "2f42ac0a06a47aa7a36accbc1c944a540865e521"
Unverified Commit 79740ece authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Use hex for large atom and residue IDs in PDB files (#3522)

parent 7c42ef5e
......@@ -164,10 +164,10 @@ class PdbStructure(object):
if command == "ATOM " or command == "HETATM":
self._add_atom(Atom(pdb_line, self, self.extraParticleIdentifier))
elif command == "CONECT":
atoms = [int(pdb_line[6:11])]
atoms = [_parse_atom_index(pdb_line[6:11])]
for pos in (11,16,21,26):
try:
atoms.append(int(pdb_line[pos:pos+5]))
atoms.append(_parse_atom_index(pdb_line[pos:pos+5]))
except:
pass
self._current_model.connects.append(atoms)
......@@ -208,11 +208,9 @@ class PdbStructure(object):
self._finalize()
def _reset_atom_numbers(self):
self._atom_numbers_are_hex = False
self._next_atom_number = 1
def _reset_residue_numbers(self):
self._residue_numbers_are_hex = False
self._next_residue_number = 1
def write(self, output_stream=sys.stdout):
......@@ -726,15 +724,8 @@ class Atom(object):
self.is_final_residue_in_chain = False
# Start parsing fields from pdb line
self.record_name = pdb_line[0:6].strip()
if pdbstructure is not None and pdbstructure._atom_numbers_are_hex:
self.serial_number = int(pdb_line[6:11], 16)
else:
try:
self.serial_number = int(pdb_line[6:11])
except:
try:
self.serial_number = int(pdb_line[6:11], 16)
pdbstructure._atom_numbers_are_hex = True
self.serial_number = _parse_atom_index(pdb_line[6:11])
except:
# Just give it the next number in sequence.
self.serial_number = pdbstructure._next_atom_number
......@@ -753,15 +744,11 @@ class Atom(object):
self.residue_name = self.residue_name_with_spaces.strip()
self.chain_id = pdb_line[21]
if pdbstructure is not None and pdbstructure._residue_numbers_are_hex:
self.residue_number = int(pdb_line[22:26], 16)
else:
try:
self.residue_number = int(pdb_line[22:26])
except:
try:
self.residue_number = int(pdb_line[22:26], 16)
pdbstructure._residue_numbers_are_hex = True
self.residue_number = int(pdb_line[22:26], 16) - 0xA000 + 10000
except:
# When VMD runs out of hex values it starts filling the residue ID field with ****.
# Look at the most recent atoms to figure out whether this is a new residue or not.
......@@ -986,6 +973,13 @@ class Atom(object):
return str(self.position)
def _parse_atom_index(index):
"""Parse the string containing an atom index, which might be either decimal or hex."""
try:
return int(index)
except:
return int(index, 16) - 0xA0000 + 100000
# run module directly for testing
if __name__=='__main__':
# Test the examples in the docstrings
......
......@@ -363,7 +363,7 @@ class PDBFile(object):
if keepIds and len(res.id) < 5:
resId = res.id
else:
resId = "%4d" % ((resIndex+1)%10000)
resId = _formatIndex(resIndex+1, 4)
if len(res.insertionCode) == 1:
resIC = res.insertionCode
else:
......@@ -384,8 +384,8 @@ class PDBFile(object):
else:
atomName = atom.name
coords = positions[posIndex]
line = "%s%5d %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s " % (
recordName, atomIndex%100000, atomName, resName, chainName, resId, resIC, _format_83(coords[0]),
line = "%s%5s %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s " % (
recordName, _formatIndex(atomIndex, 5), atomName, resName, chainName, resId, resIC, _format_83(coords[0]),
_format_83(coords[1]), _format_83(coords[2]), symbol)
if len(line) != 80:
raise ValueError('Fixed width overflow detected')
......@@ -393,7 +393,7 @@ class PDBFile(object):
posIndex += 1
atomIndex += 1
if resIndex == len(residues)-1:
print("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId), file=file)
print("TER %5s %3s %s%4s" % (_formatIndex(atomIndex, 5), resName, chainName, resId), file=file)
atomIndex += 1
if modelIndex is not None:
print("ENDMDL", file=file)
......@@ -450,11 +450,11 @@ class PDBFile(object):
for index1 in sorted(atomBonds):
bonded = atomBonds[index1]
while len(bonded) > 4:
print("CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2]), file=file)
print("CONECT%5s%5s%5s%5s" % (_formatIndex(index1, 5), _formatIndex(bonded[0], 5), _formatIndex(bonded[1], 5), _formatIndex(bonded[2], 5)), file=file)
del bonded[:4]
line = "CONECT%5d" % index1
line = "CONECT%5s" % _formatIndex(index1, 5)
for index2 in bonded:
line = "%s%5d" % (line, index2)
line = "%s%5s" % (line, _formatIndex(index2, 5))
print(line, file=file)
print("END", file=file)
......@@ -470,3 +470,14 @@ def _format_83(f):
return ('%8.3f' % f)[:8]
raise ValueError('coordinate "%s" could not be represented '
'in a width-8 field' % f)
def _formatIndex(index, places):
"""Create a string representation of an atom or residue index. If the value is larger than can fit
in the available space, switch to hex.
"""
if index < 10**places:
format = f'%{places}d'
return format % index
format = f'%{places}X'
shiftedIndex = (index - 10**places + 10*16**(places-1)) % (16**places)
return format % shiftedIndex
\ No newline at end of file
......@@ -94,6 +94,45 @@ class TestPdbFile(unittest.TestCase):
self.assertEqual(19, len(pdb.positions))
self.assertEqual('ILE', list(pdb.topology.residues())[0].name)
def test_LargeFile(self):
"""Write and read a file with more than 100,000 atoms"""
topology = Topology()
chain = topology.addChain('A')
for i in range(20000):
res = topology.addResidue('MOL', chain)
atoms = []
for j in range(6):
atoms.append(topology.addAtom(f'AT{j}', elem.carbon, res))
for j in range(5):
topology.addBond(atoms[j], atoms[j+1])
positions = [Vec3(0, 0, 0)]*topology.getNumAtoms()
# The model has 20,000 residues and 120,000 atoms.
output = StringIO()
PDBFile.writeFile(topology, positions, output)
input = StringIO(output.getvalue())
pdb = PDBFile(input)
output.close()
input.close()
self.assertEqual(len(positions), len(pdb.positions))
self.assertEqual(topology.getNumAtoms(), pdb.topology.getNumAtoms())
self.assertEqual(topology.getNumResidues(), pdb.topology.getNumResidues())
self.assertEqual(topology.getNumChains(), pdb.topology.getNumChains())
self.assertEqual(topology.getNumBonds(), pdb.topology.getNumBonds())
for atom in pdb.topology.atoms():
self.assertEqual(str(atom.index+1), atom.id)
for res in pdb.topology.residues():
self.assertEqual(str(res.index+1), res.id)
# Make sure the CONECT records were interpreted correctly.
bonds = set()
for atom1, atom2 in topology.bonds():
bonds.add(tuple(sorted((atom1.index, atom2.index))))
for atom1, atom2 in pdb.topology.bonds():
assert tuple(sorted((atom1.index, atom2.index))) in bonds
def assertVecAlmostEqual(self, p1, p2, tol=1e-7):
unit = p1.unit
p1 = p1.value_in_unit(unit)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment