Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
79740ece
Unverified
Commit
79740ece
authored
Mar 22, 2022
by
Peter Eastman
Committed by
GitHub
Mar 22, 2022
Browse files
Use hex for large atom and residue IDs in PDB files (#3522)
parent
7c42ef5e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
88 additions
and
44 deletions
+88
-44
wrappers/python/openmm/app/internal/pdbstructure.py
wrappers/python/openmm/app/internal/pdbstructure.py
+31
-37
wrappers/python/openmm/app/pdbfile.py
wrappers/python/openmm/app/pdbfile.py
+18
-7
wrappers/python/tests/TestPdbFile.py
wrappers/python/tests/TestPdbFile.py
+39
-0
No files found.
wrappers/python/openmm/app/internal/pdbstructure.py
View file @
79740ece
...
...
@@ -164,10 +164,10 @@ class PdbStructure(object):
if
command
==
"ATOM "
or
command
==
"HETATM"
:
self
.
_add_atom
(
Atom
(
pdb_line
,
self
,
self
.
extraParticleIdentifier
))
elif
command
==
"CONECT"
:
atoms
=
[
int
(
pdb_line
[
6
:
11
])]
atoms
=
[
_parse_atom_index
(
pdb_line
[
6
:
11
])]
for
pos
in
(
11
,
16
,
21
,
26
):
try
:
atoms
.
append
(
int
(
pdb_line
[
pos
:
pos
+
5
]))
atoms
.
append
(
_parse_atom_index
(
pdb_line
[
pos
:
pos
+
5
]))
except
:
pass
self
.
_current_model
.
connects
.
append
(
atoms
)
...
...
@@ -208,11 +208,9 @@ class PdbStructure(object):
self
.
_finalize
()
def
_reset_atom_numbers
(
self
):
self
.
_atom_numbers_are_hex
=
False
self
.
_next_atom_number
=
1
def
_reset_residue_numbers
(
self
):
self
.
_residue_numbers_are_hex
=
False
self
.
_next_residue_number
=
1
def
write
(
self
,
output_stream
=
sys
.
stdout
):
...
...
@@ -726,15 +724,8 @@ class Atom(object):
self
.
is_final_residue_in_chain
=
False
# Start parsing fields from pdb line
self
.
record_name
=
pdb_line
[
0
:
6
].
strip
()
if
pdbstructure
is
not
None
and
pdbstructure
.
_atom_numbers_are_hex
:
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
],
16
)
else
:
try
:
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
])
except
:
try
:
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
],
16
)
pdbstructure
.
_atom_numbers_are_hex
=
True
self
.
serial_number
=
_parse_atom_index
(
pdb_line
[
6
:
11
])
except
:
# Just give it the next number in sequence.
self
.
serial_number
=
pdbstructure
.
_next_atom_number
...
...
@@ -753,15 +744,11 @@ class Atom(object):
self
.
residue_name
=
self
.
residue_name_with_spaces
.
strip
()
self
.
chain_id
=
pdb_line
[
21
]
if
pdbstructure
is
not
None
and
pdbstructure
.
_residue_numbers_are_hex
:
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
],
16
)
else
:
try
:
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
])
except
:
try
:
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
],
16
)
pdbstructure
.
_residue_numbers_are_hex
=
True
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
],
16
)
-
0xA000
+
10000
except
:
# When VMD runs out of hex values it starts filling the residue ID field with ****.
# Look at the most recent atoms to figure out whether this is a new residue or not.
...
...
@@ -986,6 +973,13 @@ class Atom(object):
return
str
(
self
.
position
)
def
_parse_atom_index
(
index
):
"""Parse the string containing an atom index, which might be either decimal or hex."""
try
:
return
int
(
index
)
except
:
return
int
(
index
,
16
)
-
0xA0000
+
100000
# run module directly for testing
if
__name__
==
'__main__'
:
# Test the examples in the docstrings
...
...
wrappers/python/openmm/app/pdbfile.py
View file @
79740ece
...
...
@@ -363,7 +363,7 @@ class PDBFile(object):
if
keepIds
and
len
(
res
.
id
)
<
5
:
resId
=
res
.
id
else
:
resId
=
"%4d"
%
(
(
resIndex
+
1
)
%
10000
)
resId
=
_formatIndex
(
resIndex
+
1
,
4
)
if
len
(
res
.
insertionCode
)
==
1
:
resIC
=
res
.
insertionCode
else
:
...
...
@@ -384,8 +384,8 @@ class PDBFile(object):
else
:
atomName
=
atom
.
name
coords
=
positions
[
posIndex
]
line
=
"%s%5
d
%-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s "
%
(
recordName
,
atomIndex
%
100000
,
atomName
,
resName
,
chainName
,
resId
,
resIC
,
_format_83
(
coords
[
0
]),
line
=
"%s%5
s
%-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s "
%
(
recordName
,
_formatIndex
(
atomIndex
,
5
)
,
atomName
,
resName
,
chainName
,
resId
,
resIC
,
_format_83
(
coords
[
0
]),
_format_83
(
coords
[
1
]),
_format_83
(
coords
[
2
]),
symbol
)
if
len
(
line
)
!=
80
:
raise
ValueError
(
'Fixed width overflow detected'
)
...
...
@@ -393,7 +393,7 @@ class PDBFile(object):
posIndex
+=
1
atomIndex
+=
1
if
resIndex
==
len
(
residues
)
-
1
:
print
(
"TER %5
d
%3s %s%4s"
%
(
atomIndex
,
resName
,
chainName
,
resId
),
file
=
file
)
print
(
"TER %5
s
%3s %s%4s"
%
(
_formatIndex
(
atomIndex
,
5
),
resName
,
chainName
,
resId
),
file
=
file
)
atomIndex
+=
1
if
modelIndex
is
not
None
:
print
(
"ENDMDL"
,
file
=
file
)
...
...
@@ -450,11 +450,11 @@ class PDBFile(object):
for
index1
in
sorted
(
atomBonds
):
bonded
=
atomBonds
[
index1
]
while
len
(
bonded
)
>
4
:
print
(
"CONECT%5
d
%5
d
%5
d
%5
d
"
%
(
index1
,
bonded
[
0
],
bonded
[
1
],
bonded
[
2
]),
file
=
file
)
print
(
"CONECT%5
s
%5
s
%5
s
%5
s
"
%
(
_formatIndex
(
index1
,
5
),
_formatIndex
(
bonded
[
0
],
5
),
_formatIndex
(
bonded
[
1
],
5
),
_formatIndex
(
bonded
[
2
]
,
5
)
),
file
=
file
)
del
bonded
[:
4
]
line
=
"CONECT%5
d
"
%
index1
line
=
"CONECT%5
s
"
%
_formatIndex
(
index1
,
5
)
for
index2
in
bonded
:
line
=
"%s%5
d
"
%
(
line
,
index2
)
line
=
"%s%5
s
"
%
(
line
,
_formatIndex
(
index2
,
5
)
)
print
(
line
,
file
=
file
)
print
(
"END"
,
file
=
file
)
...
...
@@ -470,3 +470,14 @@ def _format_83(f):
return
(
'%8.3f'
%
f
)[:
8
]
raise
ValueError
(
'coordinate "%s" could not be represented '
'in a width-8 field'
%
f
)
def
_formatIndex
(
index
,
places
):
"""Create a string representation of an atom or residue index. If the value is larger than can fit
in the available space, switch to hex.
"""
if
index
<
10
**
places
:
format
=
f
'%
{
places
}
d'
return
format
%
index
format
=
f
'%
{
places
}
X'
shiftedIndex
=
(
index
-
10
**
places
+
10
*
16
**
(
places
-
1
))
%
(
16
**
places
)
return
format
%
shiftedIndex
\ No newline at end of file
wrappers/python/tests/TestPdbFile.py
View file @
79740ece
...
...
@@ -94,6 +94,45 @@ class TestPdbFile(unittest.TestCase):
self
.
assertEqual
(
19
,
len
(
pdb
.
positions
))
self
.
assertEqual
(
'ILE'
,
list
(
pdb
.
topology
.
residues
())[
0
].
name
)
def
test_LargeFile
(
self
):
"""Write and read a file with more than 100,000 atoms"""
topology
=
Topology
()
chain
=
topology
.
addChain
(
'A'
)
for
i
in
range
(
20000
):
res
=
topology
.
addResidue
(
'MOL'
,
chain
)
atoms
=
[]
for
j
in
range
(
6
):
atoms
.
append
(
topology
.
addAtom
(
f
'AT
{
j
}
'
,
elem
.
carbon
,
res
))
for
j
in
range
(
5
):
topology
.
addBond
(
atoms
[
j
],
atoms
[
j
+
1
])
positions
=
[
Vec3
(
0
,
0
,
0
)]
*
topology
.
getNumAtoms
()
# The model has 20,000 residues and 120,000 atoms.
output
=
StringIO
()
PDBFile
.
writeFile
(
topology
,
positions
,
output
)
input
=
StringIO
(
output
.
getvalue
())
pdb
=
PDBFile
(
input
)
output
.
close
()
input
.
close
()
self
.
assertEqual
(
len
(
positions
),
len
(
pdb
.
positions
))
self
.
assertEqual
(
topology
.
getNumAtoms
(),
pdb
.
topology
.
getNumAtoms
())
self
.
assertEqual
(
topology
.
getNumResidues
(),
pdb
.
topology
.
getNumResidues
())
self
.
assertEqual
(
topology
.
getNumChains
(),
pdb
.
topology
.
getNumChains
())
self
.
assertEqual
(
topology
.
getNumBonds
(),
pdb
.
topology
.
getNumBonds
())
for
atom
in
pdb
.
topology
.
atoms
():
self
.
assertEqual
(
str
(
atom
.
index
+
1
),
atom
.
id
)
for
res
in
pdb
.
topology
.
residues
():
self
.
assertEqual
(
str
(
res
.
index
+
1
),
res
.
id
)
# Make sure the CONECT records were interpreted correctly.
bonds
=
set
()
for
atom1
,
atom2
in
topology
.
bonds
():
bonds
.
add
(
tuple
(
sorted
((
atom1
.
index
,
atom2
.
index
))))
for
atom1
,
atom2
in
pdb
.
topology
.
bonds
():
assert
tuple
(
sorted
((
atom1
.
index
,
atom2
.
index
)))
in
bonds
def
assertVecAlmostEqual
(
self
,
p1
,
p2
,
tol
=
1e-7
):
unit
=
p1
.
unit
p1
=
p1
.
value_in_unit
(
unit
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment