Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
79740ece
Unverified
Commit
79740ece
authored
Mar 22, 2022
by
Peter Eastman
Committed by
GitHub
Mar 22, 2022
Browse files
Use hex for large atom and residue IDs in PDB files (#3522)
parent
7c42ef5e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
88 additions
and
44 deletions
+88
-44
wrappers/python/openmm/app/internal/pdbstructure.py
wrappers/python/openmm/app/internal/pdbstructure.py
+31
-37
wrappers/python/openmm/app/pdbfile.py
wrappers/python/openmm/app/pdbfile.py
+18
-7
wrappers/python/tests/TestPdbFile.py
wrappers/python/tests/TestPdbFile.py
+39
-0
No files found.
wrappers/python/openmm/app/internal/pdbstructure.py
View file @
79740ece
...
...
@@ -164,10 +164,10 @@ class PdbStructure(object):
if
command
==
"ATOM "
or
command
==
"HETATM"
:
self
.
_add_atom
(
Atom
(
pdb_line
,
self
,
self
.
extraParticleIdentifier
))
elif
command
==
"CONECT"
:
atoms
=
[
int
(
pdb_line
[
6
:
11
])]
atoms
=
[
_parse_atom_index
(
pdb_line
[
6
:
11
])]
for
pos
in
(
11
,
16
,
21
,
26
):
try
:
atoms
.
append
(
int
(
pdb_line
[
pos
:
pos
+
5
]))
atoms
.
append
(
_parse_atom_index
(
pdb_line
[
pos
:
pos
+
5
]))
except
:
pass
self
.
_current_model
.
connects
.
append
(
atoms
)
...
...
@@ -208,11 +208,9 @@ class PdbStructure(object):
self
.
_finalize
()
def
_reset_atom_numbers
(
self
):
self
.
_atom_numbers_are_hex
=
False
self
.
_next_atom_number
=
1
def
_reset_residue_numbers
(
self
):
self
.
_residue_numbers_are_hex
=
False
self
.
_next_residue_number
=
1
def
write
(
self
,
output_stream
=
sys
.
stdout
):
...
...
@@ -726,18 +724,11 @@ class Atom(object):
self
.
is_final_residue_in_chain
=
False
# Start parsing fields from pdb line
self
.
record_name
=
pdb_line
[
0
:
6
].
strip
()
if
pdbstructure
is
not
None
and
pdbstructure
.
_atom_numbers_are_hex
:
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
],
16
)
else
:
try
:
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
])
except
:
try
:
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
],
16
)
pdbstructure
.
_atom_numbers_are_hex
=
True
except
:
# Just give it the next number in sequence.
self
.
serial_number
=
pdbstructure
.
_next_atom_number
try
:
self
.
serial_number
=
_parse_atom_index
(
pdb_line
[
6
:
11
])
except
:
# Just give it the next number in sequence.
self
.
serial_number
=
pdbstructure
.
_next_atom_number
self
.
name_with_spaces
=
pdb_line
[
12
:
16
]
alternate_location_indicator
=
pdb_line
[
16
]
...
...
@@ -753,31 +744,27 @@ class Atom(object):
self
.
residue_name
=
self
.
residue_name_with_spaces
.
strip
()
self
.
chain_id
=
pdb_line
[
21
]
if
pdbstructure
is
not
None
and
pdbstructure
.
_residue_numbers_are_hex
:
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
]
,
16
)
e
lse
:
try
:
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
])
e
xcept
:
try
:
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
]
)
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
]
,
16
)
-
0xA000
+
10000
except
:
try
:
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
],
16
)
pdbstructure
.
_residue_numbers_are_hex
=
True
except
:
# When VMD runs out of hex values it starts filling the residue ID field with ****.
# Look at the most recent atoms to figure out whether this is a new residue or not.
if
pdbstructure
.
_current_model
is
None
or
pdbstructure
.
_current_model
.
_current_chain
is
None
or
pdbstructure
.
_current_model
.
_current_chain
.
_current_residue
is
None
:
# This is the first residue in the model.
# When VMD runs out of hex values it starts filling the residue ID field with ****.
# Look at the most recent atoms to figure out whether this is a new residue or not.
if
pdbstructure
.
_current_model
is
None
or
pdbstructure
.
_current_model
.
_current_chain
is
None
or
pdbstructure
.
_current_model
.
_current_chain
.
_current_residue
is
None
:
# This is the first residue in the model.
self
.
residue_number
=
pdbstructure
.
_next_residue_number
else
:
currentRes
=
pdbstructure
.
_current_model
.
_current_chain
.
_current_residue
if
currentRes
.
name_with_spaces
!=
self
.
residue_name_with_spaces
:
# The residue name has changed.
self
.
residue_number
=
pdbstructure
.
_next_residue_number
elif
self
.
name_with_spaces
in
currentRes
.
atoms_by_name
:
# There is already an atom with this name.
self
.
residue_number
=
pdbstructure
.
_next_residue_number
else
:
currentRes
=
pdbstructure
.
_current_model
.
_current_chain
.
_current_residue
if
currentRes
.
name_with_spaces
!=
self
.
residue_name_with_spaces
:
# The residue name has changed.
self
.
residue_number
=
pdbstructure
.
_next_residue_number
elif
self
.
name_with_spaces
in
currentRes
.
atoms_by_name
:
# There is already an atom with this name.
self
.
residue_number
=
pdbstructure
.
_next_residue_number
else
:
self
.
residue_number
=
currentRes
.
number
self
.
residue_number
=
currentRes
.
number
self
.
insertion_code
=
pdb_line
[
26
]
# coordinates, occupancy, and temperature factor belong in Atom.Location object
x
=
float
(
pdb_line
[
30
:
38
])
...
...
@@ -986,6 +973,13 @@ class Atom(object):
return
str
(
self
.
position
)
def
_parse_atom_index
(
index
):
"""Parse the string containing an atom index, which might be either decimal or hex."""
try
:
return
int
(
index
)
except
:
return
int
(
index
,
16
)
-
0xA0000
+
100000
# run module directly for testing
if
__name__
==
'__main__'
:
# Test the examples in the docstrings
...
...
wrappers/python/openmm/app/pdbfile.py
View file @
79740ece
...
...
@@ -363,7 +363,7 @@ class PDBFile(object):
if
keepIds
and
len
(
res
.
id
)
<
5
:
resId
=
res
.
id
else
:
resId
=
"%4d"
%
(
(
resIndex
+
1
)
%
10000
)
resId
=
_formatIndex
(
resIndex
+
1
,
4
)
if
len
(
res
.
insertionCode
)
==
1
:
resIC
=
res
.
insertionCode
else
:
...
...
@@ -384,8 +384,8 @@ class PDBFile(object):
else
:
atomName
=
atom
.
name
coords
=
positions
[
posIndex
]
line
=
"%s%5
d
%-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s "
%
(
recordName
,
atomIndex
%
100000
,
atomName
,
resName
,
chainName
,
resId
,
resIC
,
_format_83
(
coords
[
0
]),
line
=
"%s%5
s
%-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s "
%
(
recordName
,
_formatIndex
(
atomIndex
,
5
)
,
atomName
,
resName
,
chainName
,
resId
,
resIC
,
_format_83
(
coords
[
0
]),
_format_83
(
coords
[
1
]),
_format_83
(
coords
[
2
]),
symbol
)
if
len
(
line
)
!=
80
:
raise
ValueError
(
'Fixed width overflow detected'
)
...
...
@@ -393,7 +393,7 @@ class PDBFile(object):
posIndex
+=
1
atomIndex
+=
1
if
resIndex
==
len
(
residues
)
-
1
:
print
(
"TER %5
d
%3s %s%4s"
%
(
atomIndex
,
resName
,
chainName
,
resId
),
file
=
file
)
print
(
"TER %5
s
%3s %s%4s"
%
(
_formatIndex
(
atomIndex
,
5
),
resName
,
chainName
,
resId
),
file
=
file
)
atomIndex
+=
1
if
modelIndex
is
not
None
:
print
(
"ENDMDL"
,
file
=
file
)
...
...
@@ -450,11 +450,11 @@ class PDBFile(object):
for
index1
in
sorted
(
atomBonds
):
bonded
=
atomBonds
[
index1
]
while
len
(
bonded
)
>
4
:
print
(
"CONECT%5
d
%5
d
%5
d
%5
d
"
%
(
index1
,
bonded
[
0
],
bonded
[
1
],
bonded
[
2
]),
file
=
file
)
print
(
"CONECT%5
s
%5
s
%5
s
%5
s
"
%
(
_formatIndex
(
index1
,
5
),
_formatIndex
(
bonded
[
0
],
5
),
_formatIndex
(
bonded
[
1
],
5
),
_formatIndex
(
bonded
[
2
]
,
5
)
),
file
=
file
)
del
bonded
[:
4
]
line
=
"CONECT%5
d
"
%
index1
line
=
"CONECT%5
s
"
%
_formatIndex
(
index1
,
5
)
for
index2
in
bonded
:
line
=
"%s%5
d
"
%
(
line
,
index2
)
line
=
"%s%5
s
"
%
(
line
,
_formatIndex
(
index2
,
5
)
)
print
(
line
,
file
=
file
)
print
(
"END"
,
file
=
file
)
...
...
@@ -470,3 +470,14 @@ def _format_83(f):
return
(
'%8.3f'
%
f
)[:
8
]
raise
ValueError
(
'coordinate "%s" could not be represented '
'in a width-8 field'
%
f
)
def
_formatIndex
(
index
,
places
):
"""Create a string representation of an atom or residue index. If the value is larger than can fit
in the available space, switch to hex.
"""
if
index
<
10
**
places
:
format
=
f
'%
{
places
}
d'
return
format
%
index
format
=
f
'%
{
places
}
X'
shiftedIndex
=
(
index
-
10
**
places
+
10
*
16
**
(
places
-
1
))
%
(
16
**
places
)
return
format
%
shiftedIndex
\ No newline at end of file
wrappers/python/tests/TestPdbFile.py
View file @
79740ece
...
...
@@ -94,6 +94,45 @@ class TestPdbFile(unittest.TestCase):
self
.
assertEqual
(
19
,
len
(
pdb
.
positions
))
self
.
assertEqual
(
'ILE'
,
list
(
pdb
.
topology
.
residues
())[
0
].
name
)
def
test_LargeFile
(
self
):
"""Write and read a file with more than 100,000 atoms"""
topology
=
Topology
()
chain
=
topology
.
addChain
(
'A'
)
for
i
in
range
(
20000
):
res
=
topology
.
addResidue
(
'MOL'
,
chain
)
atoms
=
[]
for
j
in
range
(
6
):
atoms
.
append
(
topology
.
addAtom
(
f
'AT
{
j
}
'
,
elem
.
carbon
,
res
))
for
j
in
range
(
5
):
topology
.
addBond
(
atoms
[
j
],
atoms
[
j
+
1
])
positions
=
[
Vec3
(
0
,
0
,
0
)]
*
topology
.
getNumAtoms
()
# The model has 20,000 residues and 120,000 atoms.
output
=
StringIO
()
PDBFile
.
writeFile
(
topology
,
positions
,
output
)
input
=
StringIO
(
output
.
getvalue
())
pdb
=
PDBFile
(
input
)
output
.
close
()
input
.
close
()
self
.
assertEqual
(
len
(
positions
),
len
(
pdb
.
positions
))
self
.
assertEqual
(
topology
.
getNumAtoms
(),
pdb
.
topology
.
getNumAtoms
())
self
.
assertEqual
(
topology
.
getNumResidues
(),
pdb
.
topology
.
getNumResidues
())
self
.
assertEqual
(
topology
.
getNumChains
(),
pdb
.
topology
.
getNumChains
())
self
.
assertEqual
(
topology
.
getNumBonds
(),
pdb
.
topology
.
getNumBonds
())
for
atom
in
pdb
.
topology
.
atoms
():
self
.
assertEqual
(
str
(
atom
.
index
+
1
),
atom
.
id
)
for
res
in
pdb
.
topology
.
residues
():
self
.
assertEqual
(
str
(
res
.
index
+
1
),
res
.
id
)
# Make sure the CONECT records were interpreted correctly.
bonds
=
set
()
for
atom1
,
atom2
in
topology
.
bonds
():
bonds
.
add
(
tuple
(
sorted
((
atom1
.
index
,
atom2
.
index
))))
for
atom1
,
atom2
in
pdb
.
topology
.
bonds
():
assert
tuple
(
sorted
((
atom1
.
index
,
atom2
.
index
)))
in
bonds
def
assertVecAlmostEqual
(
self
,
p1
,
p2
,
tol
=
1e-7
):
unit
=
p1
.
unit
p1
=
p1
.
value_in_unit
(
unit
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment