Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
dc4d96fa
"wrappers/python/src/vscode:/vscode.git/clone" did not exist on "6281f23fc404f0ff01db05655dcc0479b796f454"
Commit
dc4d96fa
authored
Feb 26, 2011
by
Peter Eastman
Browse files
Cleaning up Python wrappers
parent
65b9d0b6
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
2 additions
and
1160 deletions
+2
-1160
wrappers/python/MANIFEST.in
wrappers/python/MANIFEST.in
+0
-6
wrappers/python/simtk/chem/element.py
wrappers/python/simtk/chem/element.py
+0
-147
wrappers/python/simtk/chem/openmm/__init__.py
wrappers/python/simtk/chem/openmm/__init__.py
+2
-29
wrappers/python/simtk/chem/pdbstructure.py
wrappers/python/simtk/chem/pdbstructure.py
+0
-978
No files found.
wrappers/python/MANIFEST.in
View file @
dc4d96fa
include setup.py
include setup.py
include *.py
include *.py
include *.txt
include *.txt
include doc/*.pdf
include images/*.bmp
include images/*.png
global-include README
global-include README
recursive-include simtk *.txt *.py
recursive-include simtk *.txt *.py
recursive-include OpenMM *.txt *.py *.so *.dll *.lib *.dylib *.h
recursive-include OpenMM *.txt *.py *.so *.dll *.lib *.dylib *.h
recursive-include examples *.txt *.py *.pdb *.top *.prmtop *.crd *.sh *.leap
recursive-include test *.txt *.py *.pdb *.top *.prmtop *.crd *.sh *.in *log
recursive-include scripts *.txt *.py
recursive-include src *.txt *.py *.i *.c *.cxx *.h *.sh Doxyfile
recursive-include src *.txt *.py *.i *.c *.cxx *.h *.sh Doxyfile
prune src/swig_doxygen/doxygen/html
prune src/swig_doxygen/doxygen/html
prune src/swig_doxygen/doxygen/xml
prune src/swig_doxygen/doxygen/xml
...
...
wrappers/python/simtk/chem/element.py
deleted
100644 → 0
View file @
65b9d0b6
#!/bin/env python
"""
element.py: Used for managing elements.
"""
__author__
=
"Christopher M. Bruns"
__version__
=
"1.0"
from
simtk.unit
import
daltons
class
Element
:
elements_by_symbol
=
{}
def
__init__
(
self
,
number
,
name
,
symbol
,
mass
):
self
.
atomic_number
=
number
self
.
name
=
name
self
.
symbol
=
symbol
self
.
mass
=
mass
# Index this element in a global table
s
=
symbol
.
strip
().
upper
()
assert
s
not
in
Element
.
elements_by_symbol
Element
.
elements_by_symbol
[
s
]
=
self
def
get_by_symbol
(
symbol
):
s
=
symbol
.
strip
().
upper
()
return
Element
.
elements_by_symbol
[
s
]
hydrogen
=
Element
(
1
,
"hydrogen"
,
"H"
,
1.007947
*
daltons
)
deuterium
=
Element
(
1
,
"deuterium"
,
"D"
,
2.01355321270
*
daltons
)
helium
=
Element
(
2
,
"helium"
,
"He"
,
4.003
*
daltons
)
lithium
=
Element
(
3
,
"lithium"
,
"Li"
,
6.9412
*
daltons
)
beryllium
=
Element
(
4
,
"beryllium"
,
"Be"
,
9.0121823
*
daltons
)
boron
=
Element
(
5
,
"boron"
,
"B"
,
10.8117
*
daltons
)
carbon
=
Element
(
6
,
"carbon"
,
"C"
,
12.01078
*
daltons
)
nitrogen
=
Element
(
7
,
"nitrogen"
,
"N"
,
14.00672
*
daltons
)
oxygen
=
Element
(
8
,
"oxygen"
,
"O"
,
15.99943
*
daltons
)
fluorine
=
Element
(
9
,
"fluorine"
,
"F"
,
18.99840325
*
daltons
)
neon
=
Element
(
10
,
"neon"
,
"Ne"
,
20.17976
*
daltons
)
sodium
=
Element
(
11
,
"sodium"
,
"Na"
,
22.989769282
*
daltons
)
magnesium
=
Element
(
12
,
"magnesium"
,
"Mg"
,
24.30506
*
daltons
)
aluminum
=
Element
(
13
,
"aluminum"
,
"Al"
,
26.98153868
*
daltons
)
silicon
=
Element
(
14
,
"silicon"
,
"Si"
,
28.08553
*
daltons
)
phosphorus
=
Element
(
15
,
"phosphorus"
,
"P"
,
30.9737622
*
daltons
)
sulfur
=
Element
(
16
,
"sulfur"
,
"S"
,
32.0655
*
daltons
)
chlorine
=
Element
(
17
,
"chlorine"
,
"Cl"
,
35.4532
*
daltons
)
argon
=
Element
(
18
,
"argon"
,
"Ar"
,
39.9481
*
daltons
)
potassium
=
Element
(
19
,
"potassium"
,
"K"
,
39.09831
*
daltons
)
calcium
=
Element
(
20
,
"calcium"
,
"Ca"
,
40.0784
*
daltons
)
scandium
=
Element
(
21
,
"scandium"
,
"Sc"
,
44.9559126
*
daltons
)
titanium
=
Element
(
22
,
"titanium"
,
"Ti"
,
47.8671
*
daltons
)
vanadium
=
Element
(
23
,
"vanadium"
,
"V"
,
50.94151
*
daltons
)
chromium
=
Element
(
24
,
"chromium"
,
"Cr"
,
51.99616
*
daltons
)
manganese
=
Element
(
25
,
"manganese"
,
"Mn"
,
54.9380455
*
daltons
)
iron
=
Element
(
26
,
"iron"
,
"Fe"
,
55.8452
*
daltons
)
cobalt
=
Element
(
27
,
"cobalt"
,
"Co"
,
58.9331955
*
daltons
)
nickel
=
Element
(
28
,
"nickel"
,
"Ni"
,
58.69342
*
daltons
)
copper
=
Element
(
29
,
"copper"
,
"Cu"
,
63.5463
*
daltons
)
zinc
=
Element
(
30
,
"zinc"
,
"Zn"
,
65.4094
*
daltons
)
gallium
=
Element
(
31
,
"gallium"
,
"Ga"
,
69.7231
*
daltons
)
germanium
=
Element
(
32
,
"germanium"
,
"Ge"
,
72.641
*
daltons
)
arsenic
=
Element
(
33
,
"arsenic"
,
"As"
,
74.921602
*
daltons
)
selenium
=
Element
(
34
,
"selenium"
,
"Se"
,
78.963
*
daltons
)
bromine
=
Element
(
35
,
"bromine"
,
"Br"
,
79.9041
*
daltons
)
krypton
=
Element
(
36
,
"krypton"
,
"Kr"
,
83.7982
*
daltons
)
rubidium
=
Element
(
37
,
"rubidium"
,
"Rb"
,
85.46783
*
daltons
)
strontium
=
Element
(
38
,
"strontium"
,
"Sr"
,
87.621
*
daltons
)
yttrium
=
Element
(
39
,
"yttrium"
,
"Y"
,
88.905852
*
daltons
)
zirconium
=
Element
(
40
,
"zirconium"
,
"Zr"
,
91.2242
*
daltons
)
niobium
=
Element
(
41
,
"niobium"
,
"Nb"
,
92.906382
*
daltons
)
molybdenum
=
Element
(
42
,
"molybdenum"
,
"Mo"
,
95.942
*
daltons
)
technetium
=
Element
(
43
,
"technetium"
,
"Tc"
,
98
*
daltons
)
ruthenium
=
Element
(
44
,
"ruthenium"
,
"Ru"
,
101.072
*
daltons
)
rhodium
=
Element
(
45
,
"rhodium"
,
"Rh"
,
102.905502
*
daltons
)
palladium
=
Element
(
46
,
"palladium"
,
"Pd"
,
106.421
*
daltons
)
silver
=
Element
(
47
,
"silver"
,
"Ag"
,
107.86822
*
daltons
)
cadmium
=
Element
(
48
,
"cadmium"
,
"Cd"
,
112.4118
*
daltons
)
indium
=
Element
(
49
,
"indium"
,
"In"
,
114.8183
*
daltons
)
tin
=
Element
(
50
,
"tin"
,
"Sn"
,
118.7107
*
daltons
)
antimony
=
Element
(
51
,
"antimony"
,
"Sb"
,
121.7601
*
daltons
)
tellurium
=
Element
(
52
,
"tellurium"
,
"Te"
,
127.603
*
daltons
)
iodine
=
Element
(
53
,
"iodine"
,
"I"
,
126.904473
*
daltons
)
xenon
=
Element
(
54
,
"xenon"
,
"Xe"
,
131.2936
*
daltons
)
cesium
=
Element
(
55
,
"cesium"
,
"Cs"
,
132.90545192
*
daltons
)
barium
=
Element
(
56
,
"barium"
,
"Ba"
,
137.3277
*
daltons
)
lanthanum
=
Element
(
57
,
"lanthanum"
,
"La"
,
138.905477
*
daltons
)
cerium
=
Element
(
58
,
"cerium"
,
"Ce"
,
140.1161
*
daltons
)
praseodymium
=
Element
(
59
,
"praseodymium"
,
"Pr"
,
140.907652
*
daltons
)
neodymium
=
Element
(
60
,
"neodymium"
,
"Nd"
,
144.2423
*
daltons
)
promethium
=
Element
(
61
,
"promethium"
,
"Pm"
,
145
*
daltons
)
samarium
=
Element
(
62
,
"samarium"
,
"Sm"
,
150.362
*
daltons
)
europium
=
Element
(
63
,
"europium"
,
"Eu"
,
151.9641
*
daltons
)
gadolinium
=
Element
(
64
,
"gadolinium"
,
"Gd"
,
157.253
*
daltons
)
terbium
=
Element
(
65
,
"terbium"
,
"Tb"
,
158.925352
*
daltons
)
dysprosium
=
Element
(
66
,
"dysprosium"
,
"Dy"
,
162.5001
*
daltons
)
holmium
=
Element
(
67
,
"holmium"
,
"Ho"
,
164.930322
*
daltons
)
erbium
=
Element
(
68
,
"erbium"
,
"Er"
,
167.2593
*
daltons
)
thulium
=
Element
(
69
,
"thulium"
,
"Tm"
,
168.934212
*
daltons
)
ytterbium
=
Element
(
70
,
"ytterbium"
,
"Yb"
,
173.043
*
daltons
)
lutetium
=
Element
(
71
,
"lutetium"
,
"Lu"
,
174.9671
*
daltons
)
hafnium
=
Element
(
72
,
"hafnium"
,
"Hf"
,
178.492
*
daltons
)
tantalum
=
Element
(
73
,
"tantalum"
,
"Ta"
,
180.947882
*
daltons
)
tungsten
=
Element
(
74
,
"tungsten"
,
"W"
,
183.841
*
daltons
)
rhenium
=
Element
(
75
,
"rhenium"
,
"Re"
,
186.2071
*
daltons
)
osmium
=
Element
(
76
,
"osmium"
,
"Os"
,
190.233
*
daltons
)
iridium
=
Element
(
77
,
"iridium"
,
"Ir"
,
192.2173
*
daltons
)
platinum
=
Element
(
78
,
"platinum"
,
"Pt"
,
195.0849
*
daltons
)
gold
=
Element
(
79
,
"gold"
,
"Au"
,
196.9665694
*
daltons
)
mercury
=
Element
(
80
,
"mercury"
,
"Hg"
,
200.592
*
daltons
)
thallium
=
Element
(
81
,
"thallium"
,
"Tl"
,
204.38332
*
daltons
)
lead
=
Element
(
82
,
"lead"
,
"Pb"
,
207.21
*
daltons
)
bismuth
=
Element
(
83
,
"bismuth"
,
"Bi"
,
208.980401
*
daltons
)
polonium
=
Element
(
84
,
"polonium"
,
"Po"
,
209
*
daltons
)
astatine
=
Element
(
85
,
"astatine"
,
"At"
,
210
*
daltons
)
radon
=
Element
(
86
,
"radon"
,
"Rn"
,
222.018
*
daltons
)
francium
=
Element
(
87
,
"francium"
,
"Fr"
,
223
*
daltons
)
radium
=
Element
(
88
,
"radium"
,
"Ra"
,
226
*
daltons
)
actinium
=
Element
(
89
,
"actinium"
,
"Ac"
,
227
*
daltons
)
thorium
=
Element
(
90
,
"thorium"
,
"Th"
,
232.038062
*
daltons
)
protactinium
=
Element
(
91
,
"protactinium"
,
"Pa"
,
231.035882
*
daltons
)
uranium
=
Element
(
92
,
"uranium"
,
"U"
,
238.028913
*
daltons
)
neptunium
=
Element
(
93
,
"neptunium"
,
"Np"
,
237
*
daltons
)
plutonium
=
Element
(
94
,
"plutonium"
,
"Pu"
,
244
*
daltons
)
americium
=
Element
(
95
,
"americium"
,
"Am"
,
243
*
daltons
)
curium
=
Element
(
96
,
"curium"
,
"Cm"
,
247
*
daltons
)
berkelium
=
Element
(
97
,
"berkelium"
,
"Bk"
,
247
*
daltons
)
californium
=
Element
(
98
,
"californium"
,
"Cf"
,
251
*
daltons
)
einsteinium
=
Element
(
99
,
"einsteinium"
,
"Es"
,
252
*
daltons
)
fermium
=
Element
(
100
,
"fermium"
,
"Fm"
,
257
*
daltons
)
mendelevium
=
Element
(
101
,
"mendelevium"
,
"Md"
,
258
*
daltons
)
nobelium
=
Element
(
102
,
"nobelium"
,
"No"
,
259
*
daltons
)
lawrencium
=
Element
(
103
,
"lawrencium"
,
"Lr"
,
262
*
daltons
)
rutherfordium
=
Element
(
104
,
"rutherfordium"
,
"Rf"
,
261
*
daltons
)
dubnium
=
Element
(
105
,
"dubnium"
,
"Db"
,
262
*
daltons
)
seaborgium
=
Element
(
106
,
"seaborgium"
,
"Sg"
,
266
*
daltons
)
bohrium
=
Element
(
107
,
"bohrium"
,
"Bh"
,
264
*
daltons
)
hassium
=
Element
(
108
,
"hassium"
,
"Hs"
,
269
*
daltons
)
meitnerium
=
Element
(
109
,
"meitnerium"
,
"Mt"
,
268
*
daltons
)
darmstadtium
=
Element
(
110
,
"darmstadtium"
,
"Ds"
,
281
*
daltons
)
roentgenium
=
Element
(
111
,
"roentgenium"
,
"Rg"
,
272
*
daltons
)
ununbium
=
Element
(
112
,
"ununbium"
,
"Uub"
,
285
*
daltons
)
ununtrium
=
Element
(
113
,
"ununtrium"
,
"Uut"
,
284
*
daltons
)
ununquadium
=
Element
(
114
,
"ununquadium"
,
"Uuq"
,
289
*
daltons
)
ununpentium
=
Element
(
115
,
"ununpentium"
,
"Uup"
,
288
*
daltons
)
ununhexium
=
Element
(
116
,
"ununhexium"
,
"Uuh"
,
292
*
daltons
)
wrappers/python/simtk/chem/openmm/__init__.py
View file @
dc4d96fa
...
@@ -32,33 +32,6 @@ else:
...
@@ -32,33 +32,6 @@ else:
flags
=
sys
.
getdlopenflags
()
flags
=
sys
.
getdlopenflags
()
sys
.
setdlopenflags
(
flags
|
ctypes
.
RTLD_GLOBAL
)
sys
.
setdlopenflags
(
flags
|
ctypes
.
RTLD_GLOBAL
)
import
simtk.chem
from
simtk.chem.openmm.openmm
import
*
skipPluginFilenames
=
[
"OpenMMFreeEnergy"
]
if
len
(
simtk
.
chem
.
__path__
)
>
1
:
raise
Exception
(
"more than one item in simtk.chem.openmm.__path__"
)
pluginPath
=
os
.
path
.
join
(
simtk
.
chem
.
__path__
[
0
],
'openmm'
,
'OpenMM'
,
'plugins'
)
pluginLoadingErrors
=
{}
pluginLoadedLibNames
=
[]
libFilenames
=
glob
.
glob
(
os
.
path
.
join
(
pluginPath
,
"*.%s"
%
(
libExt
)))
# Load plugins
for
filename
in
libFilenames
:
skipPlugin
=
False
for
pFilename
in
skipPluginFilenames
:
fullFilename
=
"%s%s.%s"
%
(
libPrefix
,
pFilename
,
libExt
)
if
os
.
path
.
split
(
filename
)[
-
1
]
==
fullFilename
:
skipPlugin
=
True
break
if
skipPlugin
:
continue
try
:
Platform
.
loadPluginLibrary
(
os
.
path
.
join
(
pluginPath
,
filename
))
pluginLoadedLibNames
.
append
(
filename
)
except
:
pluginLoadingErrors
[
filename
]
=
sys
.
exc_info
()
from
simtk.chem.openmm.openmm
import
Platform
pluginLoadedLibNames
=
Platform
.
loadPluginsFromDirectory
(
Platform
.
getDefaultPluginsDirectory
())
\ No newline at end of file
wrappers/python/simtk/chem/pdbstructure.py
deleted
100644 → 0
View file @
65b9d0b6
#!/bin/env python
"""
pdbstructure.py: Used for managing PDB formated files.
"""
__author__
=
"Christopher M. Bruns"
__version__
=
"1.0"
from
simtk.vec3
import
Vec3
import
simtk.unit
as
unit
import
simtk.chem.element
as
element
import
warnings
import
sys
class
PdbStructure
(
object
):
"""
PdbStructure object holds a parsed Protein Data Bank format file.
Examples:
Load a pdb structure from a file:
> pdb = PdbStructure(open("1ARJ.pdb"))
Fetch the first atom of the structure:
> print pdb.iter_atoms().next()
ATOM 1 O5' G N 17 13.768 -8.431 11.865 1.00 0.00 O
Loop over all of the atoms of the structure
> for atom in pdb.iter_atoms():
> print atom
ATOM 1 O5' G N 17 13.768 -8.431 11.865 1.00 0.00 O
...
Get a list of all atoms in the structure:
> atoms = list(pdb.iter_atoms())
also:
residues = list(pdb.iter_residues())
positions = list(pdb.iter_positions())
chains = list(pdb.iter_chains())
models = list(pdb.iter_models())
Fetch atomic coordinates of first atom:
> print pdb.iter_positions().next()
[13.768, -8.431, 11.865] A
or
> print pdb.iter_atoms().next().position
[13.768, -8.431, 11.865] A
Strip the length units from an atomic position:
> import simtk.unit
> pos = pdb.iter_positions().next()
> print pos
[13.768, -8.431, 11.865] A
> print pos / simtk.unit.angstroms
[13.768, -8.431, 11.865]
> print pos / simtk.unit.nanometers
[1.3768, -0.8431, 1.1865]
The hierarchical structure of the parsed PDB structure is as follows:
PdbStructure
Model
Chain
Residue
Atom
Location
Model - A PDB structure consists of one or more Models. Each model corresponds to one version of
an NMR structure, or to one frame of a molecular dynamics trajectory.
Chain - A Model contains one or more Chains. Each chain corresponds to one molecule, although multiple
water molecules are frequently included in the same chain.
Residue - A Chain contains one or more Residues. One Residue corresponds to one of the repeating
unit that constitutes a polymer such as protein or DNA. For non-polymeric molecules, one Residue
represents one molecule.
Atom - A Residue contains one or more Atoms. Atoms are chemical atoms.
Location - An atom can sometimes have more that one position, due to static disorder in X-ray
crystal structures. To see all of the atom positions, use the atom.iter_positions() method,
or pass the parameter "include_alt_loc=True" to one of the other iter_positions() methods.
> for pos in pdb.iter_positions(include_alt_loc=True):
> ...
Will loop over all atom positions, including multiple alternate locations for atoms that have
multiple positions. The default value of include_alt_loc is False for the iter_positions()
methods.
"""
def
__init__
(
self
,
input_stream
,
load_all_models
=
False
):
"""Create a PDB model from a PDB file stream.
Parameters:
- self (PdbStructure) The new object that is created.
- input_stream (stream) An input file stream, probably created with
open().
- load_all_models (bool) Whether to load every model of an NMR
structure or trajectory, or just load the first model, to save memory.
"""
# initialize models
self
.
load_all_models
=
load_all_models
self
.
models
=
[]
self
.
_current_model
=
None
self
.
default_model
=
None
self
.
models_by_number
=
{}
# read file
self
.
_load
(
input_stream
)
def
_load
(
self
,
input_stream
):
# Read one line at a time
for
pdb_line
in
input_stream
:
# Look for atoms
if
(
pdb_line
.
find
(
"ATOM "
)
==
0
)
or
(
pdb_line
.
find
(
"HETATM"
)
==
0
):
self
.
_add_atom
(
Atom
(
pdb_line
))
# Notice MODEL punctuation, for the next level of detail
# in the structure->model->chain->residue->atom->position hierarchy
elif
(
pdb_line
.
find
(
"MODEL"
)
==
0
):
model_number
=
int
(
pdb_line
[
10
:
14
])
self
.
_add_model
(
Model
(
model_number
))
elif
(
pdb_line
.
find
(
"ENDMDL"
)
==
0
):
self
.
_current_model
.
_finalize
()
if
not
self
.
load_all_models
:
break
elif
(
pdb_line
.
find
(
"END"
)
==
0
):
self
.
_current_model
.
_finalize
()
if
not
self
.
load_all_models
:
break
elif
(
pdb_line
.
find
(
"TER "
)
==
0
):
self
.
_current_model
.
_current_chain
.
_add_ter_record
()
self
.
_finalize
()
def
write
(
self
,
output_stream
=
sys
.
stdout
):
"""Write out structure in PDB format"""
for
model
in
self
.
models
:
if
len
(
model
.
chains
)
==
0
:
continue
if
len
(
self
.
models
)
>
1
:
print
>>
output_stream
,
"MODEL %4d"
%
(
model
.
number
)
model
.
write
(
output_stream
)
if
len
(
self
.
models
)
>
1
:
print
>>
output_stream
,
"ENDMDL"
print
>>
output_stream
,
"END"
def
_add_model
(
self
,
model
):
if
self
.
default_model
==
None
:
self
.
default_model
=
model
self
.
models
.
append
(
model
)
self
.
_current_model
=
model
if
model
.
number
not
in
self
.
models_by_number
:
self
.
models_by_number
[
model
.
number
]
=
model
def
get_model
(
self
,
model_number
):
return
self
.
models_by_number
[
model_number
]
def
model_numbers
(
self
):
return
self
.
models_by_number
.
keys
()
def
__contains__
(
self
,
model_number
):
return
self
.
models_by_number
.
__contains__
(
model_number
)
def
__getitem__
(
self
,
model_number
):
return
self
.
models_by_number
[
model_number
]
def
__iter__
(
self
):
for
model
in
self
.
models
:
yield
model
def
iter_models
(
self
,
use_all_models
=
False
):
if
use_all_models
:
for
model
in
self
:
yield
model
elif
len
(
self
.
models
)
>
0
:
yield
self
.
models
[
0
]
def
iter_chains
(
self
,
use_all_models
=
False
):
for
model
in
self
.
iter_models
(
use_all_models
):
for
chain
in
model
.
iter_chains
():
yield
chain
def
iter_residues
(
self
,
use_all_models
=
False
):
for
model
in
self
.
iter_models
(
use_all_models
):
for
res
in
model
.
iter_residues
():
yield
res
def
iter_atoms
(
self
,
use_all_models
=
False
):
for
model
in
self
.
iter_models
(
use_all_models
):
for
atom
in
model
.
iter_atoms
():
yield
atom
def
iter_positions
(
self
,
use_all_models
=
False
,
include_alt_loc
=
False
):
"""
Iterate over atomic positions.
Parameters
- use_all_models (bool=False) Get positions from all models or just the first one.
- include_alt_loc (bool=False) Get all positions for each atom, or just the first one.
"""
for
model
in
self
.
iter_models
(
use_all_models
):
for
loc
in
model
.
iter_positions
(
include_alt_loc
):
yield
loc
def
__len__
(
self
):
return
len
(
self
.
models
)
def
_add_atom
(
self
,
atom
):
"""
"""
if
self
.
_current_model
==
None
:
self
.
_add_model
(
Model
(
0
))
atom
.
model_number
=
self
.
_current_model
.
number
# Atom might be alternate position for existing atom
self
.
_current_model
.
_add_atom
(
atom
)
def
_finalize
(
self
):
"""Establish first and last residues, atoms, etc."""
for
model
in
self
.
models
:
model
.
_finalize
()
class
Model
(
object
):
"""Model holds one model of a PDB structure.
NMR structures usually have multiple models. This represents one
of them.
"""
def
__init__
(
self
,
model_number
=
1
):
self
.
number
=
model_number
self
.
chains
=
[]
self
.
_current_chain
=
None
self
.
chains_by_id
=
{}
def
_add_atom
(
self
,
atom
):
"""
"""
if
len
(
self
.
chains
)
==
0
:
self
.
_add_chain
(
Chain
(
atom
.
chain_id
))
# Create a new chain if the chain id has changed
if
self
.
_current_chain
.
chain_id
!=
atom
.
chain_id
:
self
.
_add_chain
(
Chain
(
atom
.
chain_id
))
# Create a new chain after TER record, even if ID is the same
elif
self
.
_current_chain
.
has_ter_record
:
self
.
_add_chain
(
Chain
(
atom
.
chain_id
))
self
.
_current_chain
.
_add_atom
(
atom
)
def
_add_chain
(
self
,
chain
):
self
.
chains
.
append
(
chain
)
self
.
_current_chain
=
chain
if
not
chain
.
chain_id
in
self
.
chains_by_id
:
self
.
chains_by_id
[
chain
.
chain_id
]
=
chain
def
get_chain
(
self
,
chain_id
):
return
self
.
chains_by_id
[
chain_id
]
def
chain_ids
(
self
):
return
self
.
chains_by_id
.
keys
()
def
__contains__
(
self
,
chain_id
):
return
self
.
chains_by_id
.
__contains__
(
chain_id
)
def
__getitem__
(
self
,
chain_id
):
return
self
.
chains_by_id
[
chain_id
]
def
__iter__
(
self
):
return
iter
(
self
.
chains
)
def
iter_chains
(
self
):
for
chain
in
self
:
yield
chain
def
iter_residues
(
self
):
for
chain
in
self
:
for
res
in
chain
.
iter_residues
():
yield
res
def
iter_atoms
(
self
):
for
chain
in
self
:
for
atom
in
chain
.
iter_atoms
():
yield
atom
def
iter_positions
(
self
,
include_alt_loc
=
False
):
for
chain
in
self
:
for
loc
in
chain
.
iter_positions
(
include_alt_loc
):
yield
loc
def
__len__
(
self
):
return
len
(
self
.
chains
)
def
write
(
self
,
output_stream
=
sys
.
stdout
):
# Start atom serial numbers at 1
sn
=
Model
.
AtomSerialNumber
(
1
)
for
chain
in
self
.
chains
:
chain
.
write
(
sn
,
output_stream
)
def
_finalize
(
self
):
for
chain
in
self
.
chains
:
chain
.
_finalize
()
class
AtomSerialNumber
(
object
):
"""pdb.Model inner class for pass-by-reference incrementable serial number"""
def
__init__
(
self
,
val
):
self
.
val
=
val
def
increment
(
self
):
self
.
val
+=
1
class
Chain
(
object
):
def
__init__
(
self
,
chain_id
=
' '
):
self
.
chain_id
=
chain_id
self
.
residues
=
[]
self
.
has_ter_record
=
False
self
.
_current_residue
=
None
self
.
residues_by_num_icode
=
{}
self
.
residues_by_number
=
{}
def
_add_atom
(
self
,
atom
):
"""
"""
# Create a residue if none have been created
if
len
(
self
.
residues
)
==
0
:
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
# Create a residue if the residue information has changed
elif
self
.
_current_residue
.
number
!=
atom
.
residue_number
:
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
elif
self
.
_current_residue
.
insertion_code
!=
atom
.
insertion_code
:
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
elif
self
.
_current_residue
.
name_with_spaces
==
atom
.
residue_name_with_spaces
:
# This is a normal case: number, name, and iCode have not changed
pass
elif
atom
.
alternate_location_indicator
!=
' '
:
# OK - this is a point mutation, Residue._add_atom will know what to do
pass
else
:
# Residue name does not match
# Only residue name does not match
warnings
.
warn
(
"WARNING: two consecutive residues with same number (%s, %s)"
%
(
atom
,
self
.
_current_residue
.
atoms
[
-
1
]))
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
self
.
_current_residue
.
_add_atom
(
atom
)
def
_add_residue
(
self
,
residue
):
if
len
(
self
.
residues
)
==
0
:
residue
.
is_first_in_chain
=
True
self
.
residues
.
append
(
residue
)
self
.
_current_residue
=
residue
key
=
str
(
residue
.
number
)
+
residue
.
insertion_code
# only store the first residue with a particular key
if
key
not
in
self
.
residues_by_num_icode
:
self
.
residues_by_num_icode
[
key
]
=
residue
if
residue
.
number
not
in
self
.
residues_by_number
:
self
.
residues_by_number
[
residue
.
number
]
=
residue
def
write
(
self
,
next_serial_number
,
output_stream
=
sys
.
stdout
):
for
residue
in
self
.
residues
:
residue
.
write
(
next_serial_number
,
output_stream
)
if
self
.
has_ter_record
:
r
=
self
.
residues
[
-
1
]
print
>>
output_stream
,
"TER %5d %3s %1s%4d%1s"
%
(
next_serial_number
.
val
,
r
.
name_with_spaces
,
self
.
chain_id
,
r
.
number
,
r
.
insertion_code
)
next_serial_number
.
increment
()
def
_add_ter_record
(
self
):
self
.
has_ter_record
=
True
self
.
_finalize
()
def
get_residue
(
self
,
residue_number
,
insertion_code
=
' '
):
return
residues_by_num_icode
[
str
(
residue_number
)
+
insertion_code
]
def
__contains__
(
self
,
residue_number
):
return
self
.
residues_by_number
.
__contains__
(
residue_number
)
def
__getitem__
(
self
,
residue_number
):
"""Returns the FIRST residue in this chain with a particular residue number"""
return
self
.
residues_by_number
[
residue_number
]
def
__iter__
(
self
):
for
res
in
self
.
residues
:
yield
res
def
iter_residues
(
self
):
for
res
in
self
:
yield
res
def
iter_atoms
(
self
):
for
res
in
self
:
for
atom
in
res
:
yield
atom
;
def
iter_positions
(
self
,
include_alt_loc
=
False
):
for
res
in
self
:
for
loc
in
res
.
iter_positions
(
include_alt_loc
):
yield
loc
def
__len__
(
self
):
return
len
(
self
.
residues
)
def
_finalize
(
self
):
self
.
residues
[
0
].
is_first_in_chain
=
True
self
.
residues
[
-
1
].
is_final_in_chain
=
True
for
residue
in
self
.
residues
:
residue
.
_finalize
()
class
Residue
(
object
):
def
__init__
(
self
,
name
,
number
,
insertion_code
=
' '
,
primary_alternate_location_indicator
=
' '
):
alt_loc
=
primary_alternate_location_indicator
self
.
primary_location_id
=
alt_loc
self
.
locations
=
{}
self
.
locations
[
alt_loc
]
=
Residue
.
Location
(
alt_loc
,
name
)
self
.
name_with_spaces
=
name
self
.
number
=
number
self
.
insertion_code
=
insertion_code
self
.
atoms
=
[]
self
.
atoms_by_name
=
{}
self
.
is_first_in_chain
=
False
self
.
is_final_in_chain
=
False
self
.
_current_atom
=
None
def
_add_atom
(
self
,
atom
):
"""
"""
alt_loc
=
atom
.
alternate_location_indicator
if
not
self
.
locations
.
has_key
(
alt_loc
):
self
.
locations
[
alt_loc
]
=
Residue
.
Location
(
alt_loc
,
atom
.
residue_name_with_spaces
)
assert
atom
.
residue_number
==
self
.
number
assert
atom
.
insertion_code
==
self
.
insertion_code
# Check whether this is an existing atom with another position
if
(
atom
.
name_with_spaces
in
self
.
atoms_by_name
):
old_atom
=
self
.
atoms_by_name
[
atom
.
name_with_spaces
]
# Unless this is a duplicated atom (warn about file error)
if
atom
.
alternate_location_indicator
in
old_atom
.
locations
:
warnings
.
warn
(
"WARNING: duplicate atom (%s, %s)"
%
(
atom
,
old_atom
.
_pdb_string
(
old_atom
.
serial_number
,
atom
.
alternate_location_indicator
)))
else
:
for
alt_loc
,
position
in
atom
.
locations
.
items
():
old_atom
.
locations
[
alt_loc
]
=
position
return
# no new atom added
# actually use new atom
self
.
atoms_by_name
[
atom
.
name
]
=
atom
self
.
atoms_by_name
[
atom
.
name_with_spaces
]
=
atom
self
.
atoms
.
append
(
atom
)
self
.
_current_atom
=
atom
def
write
(
self
,
next_serial_number
,
output_stream
=
sys
.
stdout
,
alt_loc
=
"*"
):
for
atom
in
self
.
atoms
:
atom
.
write
(
next_serial_number
,
output_stream
,
alt_loc
)
def
_finalize
(
self
):
if
len
(
self
.
atoms
)
>
0
:
self
.
atoms
[
0
].
is_first_atom_in_chain
=
self
.
is_first_in_chain
self
.
atoms
[
-
1
].
is_final_atom_in_chain
=
self
.
is_final_in_chain
for
atom
in
self
.
atoms
:
atom
.
is_first_residue_in_chain
=
self
.
is_first_in_chain
atom
.
is_final_residue_in_chain
=
self
.
is_final_in_chain
def
set_name_with_spaces
(
self
,
name
,
alt_loc
=
None
):
# Gromacs ffamber PDB files can have 4-character residue names
# assert len(name) == 3
if
alt_loc
==
None
:
alt_loc
=
self
.
primary_location_id
loc
=
self
.
locations
[
alt_loc
]
loc
.
name_with_spaces
=
name
loc
.
name
=
name
.
strip
()
def
get_name_with_spaces
(
self
,
alt_loc
=
None
):
if
alt_loc
==
None
:
alt_loc
=
self
.
primary_location_id
loc
=
self
.
locations
[
alt_loc
]
return
loc
.
name_with_spaces
name_with_spaces
=
property
(
get_name_with_spaces
,
set_name_with_spaces
,
doc
=
'four-character residue name including spaces'
)
def
get_name
(
self
,
alt_loc
=
None
):
if
alt_loc
==
None
:
alt_loc
=
self
.
primary_location_id
loc
=
self
.
locations
[
alt_loc
]
return
loc
.
name
name
=
property
(
get_name
,
doc
=
'residue name'
)
def
get_atom
(
self
,
atom_name
):
return
self
.
atoms_by_name
[
atom_name
]
def
__contains__
(
self
,
atom_name
):
return
self
.
atoms_by_name
.
__contains__
(
atom_name
)
def
__getitem__
(
self
,
atom_name
):
"""Returns the FIRST atom in this residue with a particular atom name"""
return
self
.
atoms_by_name
[
atom_name
]
def
__iter__
(
self
):
"""
>>> pdb_lines = [ \
"ATOM 188 N CYS A 42 40.714 -5.292 12.123 1.00 11.29 N",\
"ATOM 189 CA CYS A 42 39.736 -5.883 12.911 1.00 10.01 C",\
"ATOM 190 C CYS A 42 40.339 -6.654 14.087 1.00 22.28 C",\
"ATOM 191 O CYS A 42 41.181 -7.530 13.859 1.00 13.70 O",\
"ATOM 192 CB CYS A 42 38.949 -6.825 12.002 1.00 9.67 C",\
"ATOM 193 SG CYS A 42 37.557 -7.514 12.922 1.00 20.12 S"]
>>> res = Residue("CYS", 42)
>>> for l in pdb_lines:
... res._add_atom(Atom(l))
...
>>> for atom in res:
... print atom
ATOM 188 N CYS A 42 40.714 -5.292 12.123 1.00 11.29 N
ATOM 189 CA CYS A 42 39.736 -5.883 12.911 1.00 10.01 C
ATOM 190 C CYS A 42 40.339 -6.654 14.087 1.00 22.28 C
ATOM 191 O CYS A 42 41.181 -7.530 13.859 1.00 13.70 O
ATOM 192 CB CYS A 42 38.949 -6.825 12.002 1.00 9.67 C
ATOM 193 SG CYS A 42 37.557 -7.514 12.922 1.00 20.12 S
"""
for
atom
in
self
.
iter_atoms
():
yield
atom
# Three possibilities: primary alt_loc, certain alt_loc, or all alt_locs
def
iter_atoms
(
self
,
alt_loc
=
None
):
if
alt_loc
==
None
:
locs
=
[
self
.
primary_location_id
]
elif
alt_loc
==
""
:
locs
=
[
self
.
primary_location_id
]
elif
alt_loc
==
"*"
:
locs
=
None
else
:
locs
=
list
(
alt_loc
)
# If an atom has any location in alt_loc, emit the atom
for
atom
in
self
.
atoms
:
use_atom
=
False
# start pessimistic
for
loc2
in
atom
.
locations
.
keys
():
# print "#%s#%s" % (loc2,locs)
if
locs
==
None
:
# means all locations
use_atom
=
True
elif
loc2
in
locs
:
use_atom
=
True
if
use_atom
:
yield
atom
def
iter_positions
(
self
,
include_alt_loc
=
False
):
"""
Returns one position per atom, even if an individual atom has multiple positions.
>>> pdb_lines = [ \
"ATOM 188 N CYS A 42 40.714 -5.292 12.123 1.00 11.29 N",\
"ATOM 189 CA CYS A 42 39.736 -5.883 12.911 1.00 10.01 C",\
"ATOM 190 C CYS A 42 40.339 -6.654 14.087 1.00 22.28 C",\
"ATOM 191 O CYS A 42 41.181 -7.530 13.859 1.00 13.70 O",\
"ATOM 192 CB CYS A 42 38.949 -6.825 12.002 1.00 9.67 C",\
"ATOM 193 SG CYS A 42 37.557 -7.514 12.922 1.00 20.12 S"]
>>> res = Residue("CYS", 42)
>>> for l in pdb_lines: res._add_atom(Atom(l))
>>> for c in res.iter_positions:
... print c
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'instancemethod' object is not iterable
>>> for c in res.iter_positions():
... print c
[40.714, -5.292, 12.123] A
[39.736, -5.883, 12.911] A
[40.339, -6.654, 14.087] A
[41.181, -7.53, 13.859] A
[38.949, -6.825, 12.002] A
[37.557, -7.514, 12.922] A
"""
for
atom
in
self
:
if
include_alt_loc
:
for
loc
in
atom
.
iter_positions
():
yield
loc
else
:
yield
atom
.
position
def
__len__
(
self
):
return
len
(
self
.
atoms
)
# Residues can have multiple locations, based on alt_loc indicator
class
Location
:
"""
Inner class of residue to allow different residue names for different alternate_locations.
"""
def
__init__
(
self
,
alternate_location_indicator
,
residue_name_with_spaces
):
self
.
alternate_location_indicator
=
alternate_location_indicator
self
.
residue_name_with_spaces
=
residue_name_with_spaces
class
Atom
(
object
):
"""Atom represents one atom in a PDB structure.
"""
def
__init__
(
self
,
pdb_line
):
"""Create a new pdb.Atom from an ATOM or HETATM line.
Example line:
ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C
00000000011111111112222222222333333333344444444445555555555666666666677777777778
12345678901234567890123456789012345678901234567890123456789012345678901234567890
ATOM line format description from
http://deposit.rcsb.org/adit/docs/pdb_atom_format.html:
COLUMNS DATA TYPE CONTENTS
--------------------------------------------------------------------------------
1 - 6 Record name "ATOM "
7 - 11 Integer Atom serial number.
13 - 16 Atom Atom name.
17 Character Alternate location indicator.
18 - 20 Residue name Residue name.
22 Character Chain identifier.
23 - 26 Integer Residue sequence number.
27 AChar Code for insertion of residues.
31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
55 - 60 Real(6.2) Occupancy.
61 - 66 Real(6.2) Temperature factor (Default = 0.0).
73 - 76 LString(4) Segment identifier, left-justified.
77 - 78 LString(2) Element symbol, right-justified.
79 - 80 LString(2) Charge on the atom.
"""
# We might modify first/final status during _finalize() methods
self
.
is_first_atom_in_chain
=
False
self
.
is_final_atom_in_chain
=
False
self
.
is_first_residue_in_chain
=
False
self
.
is_final_residue_in_chain
=
False
# Start parsing fields from pdb line
self
.
record_name
=
pdb_line
[
0
:
6
].
strip
()
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
])
self
.
name_with_spaces
=
pdb_line
[
12
:
16
]
alternate_location_indicator
=
pdb_line
[
16
]
self
.
residue_name_with_spaces
=
pdb_line
[
17
:
20
]
# In some MD codes, notably ffamber in gromacs, residue name has a fourth character in
# column 21
possible_fourth_character
=
pdb_line
[
20
:
21
]
if
possible_fourth_character
!=
" "
:
# Fourth character should only be there if official 3 are already full
if
len
(
self
.
residue_name_with_spaces
.
strip
())
!=
3
:
raise
ValueError
(
'Misaligned residue name: %s'
%
pdb_line
)
self
.
residue_name_with_spaces
+=
possible_fourth_character
self
.
residue_name
=
self
.
residue_name_with_spaces
.
strip
()
self
.
chain_id
=
pdb_line
[
21
]
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
])
self
.
insertion_code
=
pdb_line
[
26
]
# coordinates, occupancy, and temperature factor belong in Atom.Location object
x
=
float
(
pdb_line
[
30
:
38
])
y
=
float
(
pdb_line
[
38
:
46
])
z
=
float
(
pdb_line
[
46
:
54
])
occupancy
=
float
(
pdb_line
[
54
:
60
])
temperature_factor
=
float
(
pdb_line
[
60
:
66
])
*
unit
.
angstroms
*
unit
.
angstroms
self
.
locations
=
{}
loc
=
Atom
.
Location
(
alternate_location_indicator
,
Vec3
([
x
,
y
,
z
])
*
unit
.
angstroms
,
occupancy
,
temperature_factor
,
self
.
residue_name_with_spaces
)
self
.
locations
[
alternate_location_indicator
]
=
loc
self
.
default_location_id
=
alternate_location_indicator
# segment id, element_symbol, and formal_charge are not always present
self
.
segment_id
=
pdb_line
[
72
:
76
].
strip
()
self
.
element_symbol
=
pdb_line
[
76
:
78
].
strip
()
try
:
self
.
formal_charge
=
int
(
pdb_line
[
78
:
80
])
except
ValueError
:
self
.
formal_charge
=
None
# figure out atom element
try
:
# First try to find a sensible element symbol from columns 76-77
self
.
element
=
element
.
get_by_symbol
(
self
.
element_symbol
)
except
KeyError
:
# otherwise, deduce element from first two characters of atom name
# remove digits found in some hydrogen atom names
symbol
=
self
.
name_with_spaces
[
0
:
2
].
strip
().
lstrip
(
"0123456789"
)
try
:
# Some molecular dynamics PDB files, such as gromacs with ffamber force
# field, include 4-character hydrogen atom names beginning with "H".
# Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four
# characters in the atom name. This problem is the fault of molecular
# dynamics code authors who feel the need to make up their own atom
# nomenclature because it is too tedious to read that provided by the PDB.
# These are the same folks who invent their own meanings for biochemical terms
# like "dipeptide". Clowntards.
if
len
(
self
.
name
)
==
4
and
self
.
name
[
0
:
1
]
==
"H"
:
self
.
element
=
element
.
hydrogen
else
:
self
.
element
=
element
.
get_by_symbol
(
symbol
)
except
KeyError
:
# OK, I give up
self
.
element
=
None
warnings
.
warn
(
"WARNING: Unknown element '%s': %s"
%
(
symbol
,
pdb_line
))
def
iter_locations
(
self
):
"""
Iterate over Atom.Location objects for this atom, including primary location.
>>> atom = Atom("ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C")
>>> for c in atom.iter_locations():
... print c
...
[6.167, 22.607, 20.046] A
"""
for
alt_loc
in
self
.
locations
:
yield
self
.
locations
[
alt_loc
]
def
iter_positions
(
self
):
"""
Iterate over atomic positions. Returns Quantity(Vec3(), unit) objects, unlike
iter_locations, which returns Atom.Location objects.
"""
for
loc
in
self
.
iter_locations
():
yield
loc
.
position
def
iter_coordinates
(
self
):
"""
Iterate over x, y, z values of primary atom position.
>>> atom = Atom("ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C")
>>> for c in atom.iter_coordinates():
... print c
...
6.167 A
22.607 A
20.046 A
"""
for
coord
in
self
.
position
:
yield
coord
# Hide existence of multiple alternate locations to avoid scaring casual users
def
get_location
(
self
,
location_id
=
None
):
id
=
location_id
if
(
id
==
None
):
id
=
self
.
default_location_id
return
self
.
locations
[
id
]
def
set_location
(
self
,
new_location
,
location_id
=
None
):
id
=
location_id
if
(
id
==
None
):
id
=
self
.
default_location_id
self
.
locations
[
id
]
=
new_location
location
=
property
(
get_location
,
set_location
,
doc
=
'default Atom.Location object'
)
def
get_position
(
self
):
return
self
.
location
.
position
def
set_position
(
self
,
coords
):
self
.
location
.
position
=
coords
position
=
property
(
get_position
,
set_position
,
doc
=
'orthogonal coordinates'
)
def
get_alternate_location_indicator
(
self
):
return
self
.
location
.
alternate_location_indicator
alternate_location_indicator
=
property
(
get_alternate_location_indicator
)
def
get_occupancy
(
self
):
return
self
.
location
.
occupancy
occupancy
=
property
(
get_occupancy
)
def
get_temperature_factor
(
self
):
return
self
.
location
.
temperature_factor
temperature_factor
=
property
(
get_temperature_factor
)
def
get_x
(
self
):
return
self
.
position
[
0
]
x
=
property
(
get_x
)
def
get_y
(
self
):
return
self
.
position
[
1
]
y
=
property
(
get_y
)
def
get_z
(
self
):
return
self
.
position
[
2
]
z
=
property
(
get_z
)
def
_pdb_string
(
self
,
serial_number
=
None
,
alternate_location_indicator
=
None
):
"""
Produce a PDB line for this atom using a particular serial number and alternate location
"""
if
serial_number
==
None
:
serial_number
=
self
.
serial_number
if
alternate_location_indicator
==
None
:
alternate_location_indicator
=
self
.
alternate_location_indicator
# produce PDB line in three parts: names, numbers, and end
# Accomodate 4-character residue names that use column 21
long_res_name
=
self
.
residue_name_with_spaces
if
len
(
long_res_name
)
==
3
:
long_res_name
+=
" "
assert
len
(
long_res_name
)
==
4
names
=
"%-6s%5d %4s%1s%4s%1s%4d%1s "
%
(
self
.
record_name
,
serial_number
,
\
self
.
name_with_spaces
,
alternate_location_indicator
,
\
long_res_name
,
self
.
chain_id
,
\
self
.
residue_number
,
self
.
insertion_code
)
numbers
=
"%8.3f%8.3f%8.3f%6.2f%6.2f "
%
(
self
.
x
.
value_in_unit
(
unit
.
angstroms
),
\
self
.
y
.
value_in_unit
(
unit
.
angstroms
),
\
self
.
z
.
value_in_unit
(
unit
.
angstroms
),
\
self
.
occupancy
,
\
self
.
temperature_factor
.
value_in_unit
(
unit
.
angstroms
*
unit
.
angstroms
))
end
=
"%-4s%2s"
%
(
\
self
.
segment_id
,
self
.
element_symbol
)
formal_charge
=
" "
if
(
self
.
formal_charge
!=
None
):
formal_charge
=
"%+2d"
%
self
.
formal_charge
return
names
+
numbers
+
end
+
formal_charge
def
__str__
(
self
):
return
self
.
_pdb_string
(
self
.
serial_number
,
self
.
alternate_location_indicator
)
def
write
(
self
,
next_serial_number
,
output_stream
=
sys
.
stdout
,
alt_loc
=
"*"
):
"""
alt_loc = "*" means write all alternate locations
alt_loc = None means write just the primary location
alt_loc = "AB" means write locations "A" and "B"
"""
if
alt_loc
==
None
:
locs
=
[
self
.
default_location_id
]
elif
alt_loc
==
""
:
locs
=
[
self
.
default_location_id
]
elif
alt_loc
==
"*"
:
locs
=
self
.
locations
.
keys
()
locs
.
sort
()
else
:
locs
=
list
(
alt_loc
)
for
loc_id
in
locs
:
print
>>
output_stream
,
self
.
_pdb_string
(
next_serial_number
.
val
,
loc_id
)
next_serial_number
.
increment
()
def
set_name_with_spaces
(
self
,
name
):
assert
len
(
name
)
==
4
self
.
_name_with_spaces
=
name
self
.
_name
=
name
.
strip
()
def
get_name_with_spaces
(
self
):
return
self
.
_name_with_spaces
name_with_spaces
=
property
(
get_name_with_spaces
,
set_name_with_spaces
,
doc
=
'four-character residue name including spaces'
)
def
get_name
(
self
):
return
self
.
_name
name
=
property
(
get_name
,
doc
=
'residue name'
)
class
Location
(
object
):
"""
Inner class of Atom for holding alternate locations
"""
def
__init__
(
self
,
alt_loc
,
position
,
occupancy
,
temperature_factor
,
residue_name
):
self
.
alternate_location_indicator
=
alt_loc
self
.
position
=
position
self
.
occupancy
=
occupancy
self
.
temperature_factor
=
temperature_factor
self
.
residue_name
=
residue_name
def
__iter__
(
self
):
"""
Examples
>>> from simtk.vec3 import Vec3
>>> import simtk.unit as unit
>>> l = Atom.Location(' ', Vec3([1,2,3])*unit.angstroms, 1.0, 20.0*unit.angstroms**2, "XXX")
>>> for c in l:
... print c
...
1 A
2 A
3 A
"""
for
coord
in
self
.
position
:
yield
coord
def
__str__
(
self
):
return
str
(
self
.
position
)
# run module directly for testing
if
__name__
==
'__main__'
:
# Test the examples in the docstrings
import
doctest
,
sys
doctest
.
testmod
(
sys
.
modules
[
__name__
])
import
sys
import
os
import
gzip
import
re
import
time
# Test atom line parsing
pdb_line
=
"ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C"
a
=
Atom
(
pdb_line
)
assert
a
.
record_name
==
"ATOM"
assert
a
.
serial_number
==
2209
assert
a
.
name
==
"CB"
assert
a
.
name_with_spaces
==
" CB "
assert
a
.
residue_name
==
"TYR"
assert
a
.
residue_name_with_spaces
==
"TYR"
assert
a
.
chain_id
==
"A"
assert
a
.
residue_number
==
299
assert
a
.
insertion_code
==
" "
assert
a
.
alternate_location_indicator
==
" "
assert
a
.
x
==
6.167
*
unit
.
angstroms
assert
a
.
y
==
22.607
*
unit
.
angstroms
assert
a
.
z
==
20.046
*
unit
.
angstroms
assert
a
.
occupancy
==
1.00
assert
a
.
temperature_factor
==
8.12
*
unit
.
angstroms
*
unit
.
angstroms
assert
a
.
segment_id
==
""
assert
a
.
element_symbol
==
"C"
# print pdb_line
# print str(a)
assert
str
(
a
).
rstrip
()
==
pdb_line
.
rstrip
()
a
=
Atom
(
"ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C"
)
# misaligned residue name - bad
try
:
a
=
Atom
(
"ATOM 2209 CB TYRA 299 6.167 22.607 20.046 1.00 8.12 C"
)
assert
(
False
)
except
ValueError
:
pass
# four character residue name -- not so bad
a
=
Atom
(
"ATOM 2209 CB NTYRA 299 6.167 22.607 20.046 1.00 8.12 C"
)
atom_count
=
0
residue_count
=
0
chain_count
=
0
model_count
=
0
structure_count
=
0
def
parse_one_pdb
(
pdb_file_name
):
global
atom_count
,
residue_count
,
chain_count
,
model_count
,
structure_count
print
pdb_file_name
if
pdb_file_name
[
-
3
:]
==
".gz"
:
fh
=
gzip
.
open
(
pdb_file_name
)
else
:
fh
=
open
(
pdb_file_name
)
pdb
=
PdbStructure
(
fh
,
load_all_models
=
True
)
# print " %d atoms found" % len(pdb.atoms)
atom_count
+=
len
(
list
(
pdb
.
iter_atoms
()))
residue_count
+=
len
(
list
(
pdb
.
iter_residues
()))
chain_count
+=
len
(
list
(
pdb
.
iter_chains
()))
model_count
+=
len
(
list
(
pdb
.
iter_models
()))
structure_count
+=
1
fh
.
close
return
pdb
# Parse one file
pdb_file_name
=
"/home/Christopher Bruns/Desktop/1ARJ.pdb"
if
os
.
path
.
exists
(
pdb_file_name
):
parse_one_pdb
(
pdb_file_name
)
# try parsing the entire PDB
pdb_dir
=
"/cygdrive/j/pdb/data/structures/divided/pdb"
if
os
.
path
.
exists
(
pdb_dir
):
parse_entire_pdb
=
False
parse_one_division
=
False
parse_one_file
=
False
start_time
=
time
.
time
()
if
parse_one_file
:
pdb_id
=
"2aed"
middle_two
=
pdb_id
[
1
:
3
]
full_pdb_file
=
os
.
path
.
join
(
pdb_dir
,
middle_two
,
"pdb%s.ent.gz"
%
pdb_id
)
parse_one_pdb
(
full_pdb_file
)
if
parse_one_division
:
subdir
=
"ae"
full_subdir
=
os
.
path
.
join
(
pdb_dir
,
subdir
)
for
pdb_file
in
os
.
listdir
(
full_subdir
):
if
not
re
.
match
(
"pdb.%2s.\.ent\.gz"
%
subdir
,
pdb_file
):
continue
full_pdb_file
=
os
.
path
.
join
(
full_subdir
,
pdb_file
)
parse_one_pdb
(
full_pdb_file
)
if
parse_entire_pdb
:
for
subdir
in
os
.
listdir
(
pdb_dir
):
if
not
len
(
subdir
)
==
2
:
continue
full_subdir
=
os
.
path
.
join
(
pdb_dir
,
subdir
)
if
not
os
.
path
.
isdir
(
full_subdir
):
continue
for
pdb_file
in
os
.
listdir
(
full_subdir
):
if
not
re
.
match
(
"pdb.%2s.\.ent\.gz"
%
subdir
,
pdb_file
):
continue
full_pdb_file
=
os
.
path
.
join
(
full_subdir
,
pdb_file
)
parse_one_pdb
(
full_pdb_file
)
end_time
=
time
.
time
()
elapsed
=
end_time
-
start_time
minutes
=
elapsed
/
60
seconds
=
elapsed
%
60
hours
=
minutes
/
60
minutes
=
minutes
%
60
print
"%dh:%02dm:%02ds elapsed"
%
(
hours
,
minutes
,
seconds
)
print
"%d atoms found"
%
atom_count
print
"%d residues found"
%
residue_count
print
"%d chains found"
%
chain_count
print
"%d models found"
%
model_count
print
"%d structures found"
%
structure_count
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment