Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
dc4d96fa
Commit
dc4d96fa
authored
Feb 26, 2011
by
Peter Eastman
Browse files
Cleaning up Python wrappers
parent
65b9d0b6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
2 additions
and
1160 deletions
+2
-1160
wrappers/python/MANIFEST.in
wrappers/python/MANIFEST.in
+0
-6
wrappers/python/simtk/chem/element.py
wrappers/python/simtk/chem/element.py
+0
-147
wrappers/python/simtk/chem/openmm/__init__.py
wrappers/python/simtk/chem/openmm/__init__.py
+2
-29
wrappers/python/simtk/chem/pdbstructure.py
wrappers/python/simtk/chem/pdbstructure.py
+0
-978
No files found.
wrappers/python/MANIFEST.in
View file @
dc4d96fa
include setup.py
include setup.py
include *.py
include *.py
include *.txt
include *.txt
include doc/*.pdf
include images/*.bmp
include images/*.png
global-include README
global-include README
recursive-include simtk *.txt *.py
recursive-include simtk *.txt *.py
recursive-include OpenMM *.txt *.py *.so *.dll *.lib *.dylib *.h
recursive-include OpenMM *.txt *.py *.so *.dll *.lib *.dylib *.h
recursive-include examples *.txt *.py *.pdb *.top *.prmtop *.crd *.sh *.leap
recursive-include test *.txt *.py *.pdb *.top *.prmtop *.crd *.sh *.in *log
recursive-include scripts *.txt *.py
recursive-include src *.txt *.py *.i *.c *.cxx *.h *.sh Doxyfile
recursive-include src *.txt *.py *.i *.c *.cxx *.h *.sh Doxyfile
prune src/swig_doxygen/doxygen/html
prune src/swig_doxygen/doxygen/html
prune src/swig_doxygen/doxygen/xml
prune src/swig_doxygen/doxygen/xml
...
...
wrappers/python/simtk/chem/element.py
deleted
100644 → 0
View file @
65b9d0b6
#!/bin/env python
"""
element.py: Used for managing elements.
"""
__author__
=
"Christopher M. Bruns"
__version__
=
"1.0"
from
simtk.unit
import
daltons
class
Element
:
elements_by_symbol
=
{}
def
__init__
(
self
,
number
,
name
,
symbol
,
mass
):
self
.
atomic_number
=
number
self
.
name
=
name
self
.
symbol
=
symbol
self
.
mass
=
mass
# Index this element in a global table
s
=
symbol
.
strip
().
upper
()
assert
s
not
in
Element
.
elements_by_symbol
Element
.
elements_by_symbol
[
s
]
=
self
def
get_by_symbol
(
symbol
):
s
=
symbol
.
strip
().
upper
()
return
Element
.
elements_by_symbol
[
s
]
hydrogen
=
Element
(
1
,
"hydrogen"
,
"H"
,
1.007947
*
daltons
)
deuterium
=
Element
(
1
,
"deuterium"
,
"D"
,
2.01355321270
*
daltons
)
helium
=
Element
(
2
,
"helium"
,
"He"
,
4.003
*
daltons
)
lithium
=
Element
(
3
,
"lithium"
,
"Li"
,
6.9412
*
daltons
)
beryllium
=
Element
(
4
,
"beryllium"
,
"Be"
,
9.0121823
*
daltons
)
boron
=
Element
(
5
,
"boron"
,
"B"
,
10.8117
*
daltons
)
carbon
=
Element
(
6
,
"carbon"
,
"C"
,
12.01078
*
daltons
)
nitrogen
=
Element
(
7
,
"nitrogen"
,
"N"
,
14.00672
*
daltons
)
oxygen
=
Element
(
8
,
"oxygen"
,
"O"
,
15.99943
*
daltons
)
fluorine
=
Element
(
9
,
"fluorine"
,
"F"
,
18.99840325
*
daltons
)
neon
=
Element
(
10
,
"neon"
,
"Ne"
,
20.17976
*
daltons
)
sodium
=
Element
(
11
,
"sodium"
,
"Na"
,
22.989769282
*
daltons
)
magnesium
=
Element
(
12
,
"magnesium"
,
"Mg"
,
24.30506
*
daltons
)
aluminum
=
Element
(
13
,
"aluminum"
,
"Al"
,
26.98153868
*
daltons
)
silicon
=
Element
(
14
,
"silicon"
,
"Si"
,
28.08553
*
daltons
)
phosphorus
=
Element
(
15
,
"phosphorus"
,
"P"
,
30.9737622
*
daltons
)
sulfur
=
Element
(
16
,
"sulfur"
,
"S"
,
32.0655
*
daltons
)
chlorine
=
Element
(
17
,
"chlorine"
,
"Cl"
,
35.4532
*
daltons
)
argon
=
Element
(
18
,
"argon"
,
"Ar"
,
39.9481
*
daltons
)
potassium
=
Element
(
19
,
"potassium"
,
"K"
,
39.09831
*
daltons
)
calcium
=
Element
(
20
,
"calcium"
,
"Ca"
,
40.0784
*
daltons
)
scandium
=
Element
(
21
,
"scandium"
,
"Sc"
,
44.9559126
*
daltons
)
titanium
=
Element
(
22
,
"titanium"
,
"Ti"
,
47.8671
*
daltons
)
vanadium
=
Element
(
23
,
"vanadium"
,
"V"
,
50.94151
*
daltons
)
chromium
=
Element
(
24
,
"chromium"
,
"Cr"
,
51.99616
*
daltons
)
manganese
=
Element
(
25
,
"manganese"
,
"Mn"
,
54.9380455
*
daltons
)
iron
=
Element
(
26
,
"iron"
,
"Fe"
,
55.8452
*
daltons
)
cobalt
=
Element
(
27
,
"cobalt"
,
"Co"
,
58.9331955
*
daltons
)
nickel
=
Element
(
28
,
"nickel"
,
"Ni"
,
58.69342
*
daltons
)
copper
=
Element
(
29
,
"copper"
,
"Cu"
,
63.5463
*
daltons
)
zinc
=
Element
(
30
,
"zinc"
,
"Zn"
,
65.4094
*
daltons
)
gallium
=
Element
(
31
,
"gallium"
,
"Ga"
,
69.7231
*
daltons
)
germanium
=
Element
(
32
,
"germanium"
,
"Ge"
,
72.641
*
daltons
)
arsenic
=
Element
(
33
,
"arsenic"
,
"As"
,
74.921602
*
daltons
)
selenium
=
Element
(
34
,
"selenium"
,
"Se"
,
78.963
*
daltons
)
bromine
=
Element
(
35
,
"bromine"
,
"Br"
,
79.9041
*
daltons
)
krypton
=
Element
(
36
,
"krypton"
,
"Kr"
,
83.7982
*
daltons
)
rubidium
=
Element
(
37
,
"rubidium"
,
"Rb"
,
85.46783
*
daltons
)
strontium
=
Element
(
38
,
"strontium"
,
"Sr"
,
87.621
*
daltons
)
yttrium
=
Element
(
39
,
"yttrium"
,
"Y"
,
88.905852
*
daltons
)
zirconium
=
Element
(
40
,
"zirconium"
,
"Zr"
,
91.2242
*
daltons
)
niobium
=
Element
(
41
,
"niobium"
,
"Nb"
,
92.906382
*
daltons
)
molybdenum
=
Element
(
42
,
"molybdenum"
,
"Mo"
,
95.942
*
daltons
)
technetium
=
Element
(
43
,
"technetium"
,
"Tc"
,
98
*
daltons
)
ruthenium
=
Element
(
44
,
"ruthenium"
,
"Ru"
,
101.072
*
daltons
)
rhodium
=
Element
(
45
,
"rhodium"
,
"Rh"
,
102.905502
*
daltons
)
palladium
=
Element
(
46
,
"palladium"
,
"Pd"
,
106.421
*
daltons
)
silver
=
Element
(
47
,
"silver"
,
"Ag"
,
107.86822
*
daltons
)
cadmium
=
Element
(
48
,
"cadmium"
,
"Cd"
,
112.4118
*
daltons
)
indium
=
Element
(
49
,
"indium"
,
"In"
,
114.8183
*
daltons
)
tin
=
Element
(
50
,
"tin"
,
"Sn"
,
118.7107
*
daltons
)
antimony
=
Element
(
51
,
"antimony"
,
"Sb"
,
121.7601
*
daltons
)
tellurium
=
Element
(
52
,
"tellurium"
,
"Te"
,
127.603
*
daltons
)
iodine
=
Element
(
53
,
"iodine"
,
"I"
,
126.904473
*
daltons
)
xenon
=
Element
(
54
,
"xenon"
,
"Xe"
,
131.2936
*
daltons
)
cesium
=
Element
(
55
,
"cesium"
,
"Cs"
,
132.90545192
*
daltons
)
barium
=
Element
(
56
,
"barium"
,
"Ba"
,
137.3277
*
daltons
)
lanthanum
=
Element
(
57
,
"lanthanum"
,
"La"
,
138.905477
*
daltons
)
cerium
=
Element
(
58
,
"cerium"
,
"Ce"
,
140.1161
*
daltons
)
praseodymium
=
Element
(
59
,
"praseodymium"
,
"Pr"
,
140.907652
*
daltons
)
neodymium
=
Element
(
60
,
"neodymium"
,
"Nd"
,
144.2423
*
daltons
)
promethium
=
Element
(
61
,
"promethium"
,
"Pm"
,
145
*
daltons
)
samarium
=
Element
(
62
,
"samarium"
,
"Sm"
,
150.362
*
daltons
)
europium
=
Element
(
63
,
"europium"
,
"Eu"
,
151.9641
*
daltons
)
gadolinium
=
Element
(
64
,
"gadolinium"
,
"Gd"
,
157.253
*
daltons
)
terbium
=
Element
(
65
,
"terbium"
,
"Tb"
,
158.925352
*
daltons
)
dysprosium
=
Element
(
66
,
"dysprosium"
,
"Dy"
,
162.5001
*
daltons
)
holmium
=
Element
(
67
,
"holmium"
,
"Ho"
,
164.930322
*
daltons
)
erbium
=
Element
(
68
,
"erbium"
,
"Er"
,
167.2593
*
daltons
)
thulium
=
Element
(
69
,
"thulium"
,
"Tm"
,
168.934212
*
daltons
)
ytterbium
=
Element
(
70
,
"ytterbium"
,
"Yb"
,
173.043
*
daltons
)
lutetium
=
Element
(
71
,
"lutetium"
,
"Lu"
,
174.9671
*
daltons
)
hafnium
=
Element
(
72
,
"hafnium"
,
"Hf"
,
178.492
*
daltons
)
tantalum
=
Element
(
73
,
"tantalum"
,
"Ta"
,
180.947882
*
daltons
)
tungsten
=
Element
(
74
,
"tungsten"
,
"W"
,
183.841
*
daltons
)
rhenium
=
Element
(
75
,
"rhenium"
,
"Re"
,
186.2071
*
daltons
)
osmium
=
Element
(
76
,
"osmium"
,
"Os"
,
190.233
*
daltons
)
iridium
=
Element
(
77
,
"iridium"
,
"Ir"
,
192.2173
*
daltons
)
platinum
=
Element
(
78
,
"platinum"
,
"Pt"
,
195.0849
*
daltons
)
gold
=
Element
(
79
,
"gold"
,
"Au"
,
196.9665694
*
daltons
)
mercury
=
Element
(
80
,
"mercury"
,
"Hg"
,
200.592
*
daltons
)
thallium
=
Element
(
81
,
"thallium"
,
"Tl"
,
204.38332
*
daltons
)
lead
=
Element
(
82
,
"lead"
,
"Pb"
,
207.21
*
daltons
)
bismuth
=
Element
(
83
,
"bismuth"
,
"Bi"
,
208.980401
*
daltons
)
polonium
=
Element
(
84
,
"polonium"
,
"Po"
,
209
*
daltons
)
astatine
=
Element
(
85
,
"astatine"
,
"At"
,
210
*
daltons
)
radon
=
Element
(
86
,
"radon"
,
"Rn"
,
222.018
*
daltons
)
francium
=
Element
(
87
,
"francium"
,
"Fr"
,
223
*
daltons
)
radium
=
Element
(
88
,
"radium"
,
"Ra"
,
226
*
daltons
)
actinium
=
Element
(
89
,
"actinium"
,
"Ac"
,
227
*
daltons
)
thorium
=
Element
(
90
,
"thorium"
,
"Th"
,
232.038062
*
daltons
)
protactinium
=
Element
(
91
,
"protactinium"
,
"Pa"
,
231.035882
*
daltons
)
uranium
=
Element
(
92
,
"uranium"
,
"U"
,
238.028913
*
daltons
)
neptunium
=
Element
(
93
,
"neptunium"
,
"Np"
,
237
*
daltons
)
plutonium
=
Element
(
94
,
"plutonium"
,
"Pu"
,
244
*
daltons
)
americium
=
Element
(
95
,
"americium"
,
"Am"
,
243
*
daltons
)
curium
=
Element
(
96
,
"curium"
,
"Cm"
,
247
*
daltons
)
berkelium
=
Element
(
97
,
"berkelium"
,
"Bk"
,
247
*
daltons
)
californium
=
Element
(
98
,
"californium"
,
"Cf"
,
251
*
daltons
)
einsteinium
=
Element
(
99
,
"einsteinium"
,
"Es"
,
252
*
daltons
)
fermium
=
Element
(
100
,
"fermium"
,
"Fm"
,
257
*
daltons
)
mendelevium
=
Element
(
101
,
"mendelevium"
,
"Md"
,
258
*
daltons
)
nobelium
=
Element
(
102
,
"nobelium"
,
"No"
,
259
*
daltons
)
lawrencium
=
Element
(
103
,
"lawrencium"
,
"Lr"
,
262
*
daltons
)
rutherfordium
=
Element
(
104
,
"rutherfordium"
,
"Rf"
,
261
*
daltons
)
dubnium
=
Element
(
105
,
"dubnium"
,
"Db"
,
262
*
daltons
)
seaborgium
=
Element
(
106
,
"seaborgium"
,
"Sg"
,
266
*
daltons
)
bohrium
=
Element
(
107
,
"bohrium"
,
"Bh"
,
264
*
daltons
)
hassium
=
Element
(
108
,
"hassium"
,
"Hs"
,
269
*
daltons
)
meitnerium
=
Element
(
109
,
"meitnerium"
,
"Mt"
,
268
*
daltons
)
darmstadtium
=
Element
(
110
,
"darmstadtium"
,
"Ds"
,
281
*
daltons
)
roentgenium
=
Element
(
111
,
"roentgenium"
,
"Rg"
,
272
*
daltons
)
ununbium
=
Element
(
112
,
"ununbium"
,
"Uub"
,
285
*
daltons
)
ununtrium
=
Element
(
113
,
"ununtrium"
,
"Uut"
,
284
*
daltons
)
ununquadium
=
Element
(
114
,
"ununquadium"
,
"Uuq"
,
289
*
daltons
)
ununpentium
=
Element
(
115
,
"ununpentium"
,
"Uup"
,
288
*
daltons
)
ununhexium
=
Element
(
116
,
"ununhexium"
,
"Uuh"
,
292
*
daltons
)
wrappers/python/simtk/chem/openmm/__init__.py
View file @
dc4d96fa
...
@@ -32,33 +32,6 @@ else:
...
@@ -32,33 +32,6 @@ else:
flags
=
sys
.
getdlopenflags
()
flags
=
sys
.
getdlopenflags
()
sys
.
setdlopenflags
(
flags
|
ctypes
.
RTLD_GLOBAL
)
sys
.
setdlopenflags
(
flags
|
ctypes
.
RTLD_GLOBAL
)
import
simtk.chem
from
simtk.chem.openmm.openmm
import
*
skipPluginFilenames
=
[
"OpenMMFreeEnergy"
]
if
len
(
simtk
.
chem
.
__path__
)
>
1
:
raise
Exception
(
"more than one item in simtk.chem.openmm.__path__"
)
pluginPath
=
os
.
path
.
join
(
simtk
.
chem
.
__path__
[
0
],
'openmm'
,
'OpenMM'
,
'plugins'
)
pluginLoadingErrors
=
{}
pluginLoadedLibNames
=
[]
libFilenames
=
glob
.
glob
(
os
.
path
.
join
(
pluginPath
,
"*.%s"
%
(
libExt
)))
# Load plugins
for
filename
in
libFilenames
:
skipPlugin
=
False
for
pFilename
in
skipPluginFilenames
:
fullFilename
=
"%s%s.%s"
%
(
libPrefix
,
pFilename
,
libExt
)
if
os
.
path
.
split
(
filename
)[
-
1
]
==
fullFilename
:
skipPlugin
=
True
break
if
skipPlugin
:
continue
try
:
Platform
.
loadPluginLibrary
(
os
.
path
.
join
(
pluginPath
,
filename
))
pluginLoadedLibNames
.
append
(
filename
)
except
:
pluginLoadingErrors
[
filename
]
=
sys
.
exc_info
()
from
simtk.chem.openmm.openmm
import
Platform
pluginLoadedLibNames
=
Platform
.
loadPluginsFromDirectory
(
Platform
.
getDefaultPluginsDirectory
())
\ No newline at end of file
wrappers/python/simtk/chem/pdbstructure.py
deleted
100644 → 0
View file @
65b9d0b6
#!/bin/env python
"""
pdbstructure.py: Used for managing PDB formated files.
"""
__author__
=
"Christopher M. Bruns"
__version__
=
"1.0"
from
simtk.vec3
import
Vec3
import
simtk.unit
as
unit
import
simtk.chem.element
as
element
import
warnings
import
sys
class
PdbStructure
(
object
):
"""
PdbStructure object holds a parsed Protein Data Bank format file.
Examples:
Load a pdb structure from a file:
> pdb = PdbStructure(open("1ARJ.pdb"))
Fetch the first atom of the structure:
> print pdb.iter_atoms().next()
ATOM 1 O5' G N 17 13.768 -8.431 11.865 1.00 0.00 O
Loop over all of the atoms of the structure
> for atom in pdb.iter_atoms():
> print atom
ATOM 1 O5' G N 17 13.768 -8.431 11.865 1.00 0.00 O
...
Get a list of all atoms in the structure:
> atoms = list(pdb.iter_atoms())
also:
residues = list(pdb.iter_residues())
positions = list(pdb.iter_positions())
chains = list(pdb.iter_chains())
models = list(pdb.iter_models())
Fetch atomic coordinates of first atom:
> print pdb.iter_positions().next()
[13.768, -8.431, 11.865] A
or
> print pdb.iter_atoms().next().position
[13.768, -8.431, 11.865] A
Strip the length units from an atomic position:
> import simtk.unit
> pos = pdb.iter_positions().next()
> print pos
[13.768, -8.431, 11.865] A
> print pos / simtk.unit.angstroms
[13.768, -8.431, 11.865]
> print pos / simtk.unit.nanometers
[1.3768, -0.8431, 1.1865]
The hierarchical structure of the parsed PDB structure is as follows:
PdbStructure
Model
Chain
Residue
Atom
Location
Model - A PDB structure consists of one or more Models. Each model corresponds to one version of
an NMR structure, or to one frame of a molecular dynamics trajectory.
Chain - A Model contains one or more Chains. Each chain corresponds to one molecule, although multiple
water molecules are frequently included in the same chain.
Residue - A Chain contains one or more Residues. One Residue corresponds to one of the repeating
unit that constitutes a polymer such as protein or DNA. For non-polymeric molecules, one Residue
represents one molecule.
Atom - A Residue contains one or more Atoms. Atoms are chemical atoms.
Location - An atom can sometimes have more that one position, due to static disorder in X-ray
crystal structures. To see all of the atom positions, use the atom.iter_positions() method,
or pass the parameter "include_alt_loc=True" to one of the other iter_positions() methods.
> for pos in pdb.iter_positions(include_alt_loc=True):
> ...
Will loop over all atom positions, including multiple alternate locations for atoms that have
multiple positions. The default value of include_alt_loc is False for the iter_positions()
methods.
"""
def
__init__
(
self
,
input_stream
,
load_all_models
=
False
):
"""Create a PDB model from a PDB file stream.
Parameters:
- self (PdbStructure) The new object that is created.
- input_stream (stream) An input file stream, probably created with
open().
- load_all_models (bool) Whether to load every model of an NMR
structure or trajectory, or just load the first model, to save memory.
"""
# initialize models
self
.
load_all_models
=
load_all_models
self
.
models
=
[]
self
.
_current_model
=
None
self
.
default_model
=
None
self
.
models_by_number
=
{}
# read file
self
.
_load
(
input_stream
)
def
_load
(
self
,
input_stream
):
# Read one line at a time
for
pdb_line
in
input_stream
:
# Look for atoms
if
(
pdb_line
.
find
(
"ATOM "
)
==
0
)
or
(
pdb_line
.
find
(
"HETATM"
)
==
0
):
self
.
_add_atom
(
Atom
(
pdb_line
))
# Notice MODEL punctuation, for the next level of detail
# in the structure->model->chain->residue->atom->position hierarchy
elif
(
pdb_line
.
find
(
"MODEL"
)
==
0
):
model_number
=
int
(
pdb_line
[
10
:
14
])
self
.
_add_model
(
Model
(
model_number
))
elif
(
pdb_line
.
find
(
"ENDMDL"
)
==
0
):
self
.
_current_model
.
_finalize
()
if
not
self
.
load_all_models
:
break
elif
(
pdb_line
.
find
(
"END"
)
==
0
):
self
.
_current_model
.
_finalize
()
if
not
self
.
load_all_models
:
break
elif
(
pdb_line
.
find
(
"TER "
)
==
0
):
self
.
_current_model
.
_current_chain
.
_add_ter_record
()
self
.
_finalize
()
def
write
(
self
,
output_stream
=
sys
.
stdout
):
"""Write out structure in PDB format"""
for
model
in
self
.
models
:
if
len
(
model
.
chains
)
==
0
:
continue
if
len
(
self
.
models
)
>
1
:
print
>>
output_stream
,
"MODEL %4d"
%
(
model
.
number
)
model
.
write
(
output_stream
)
if
len
(
self
.
models
)
>
1
:
print
>>
output_stream
,
"ENDMDL"
print
>>
output_stream
,
"END"
def
_add_model
(
self
,
model
):
if
self
.
default_model
==
None
:
self
.
default_model
=
model
self
.
models
.
append
(
model
)
self
.
_current_model
=
model
if
model
.
number
not
in
self
.
models_by_number
:
self
.
models_by_number
[
model
.
number
]
=
model
def
get_model
(
self
,
model_number
):
return
self
.
models_by_number
[
model_number
]
def
model_numbers
(
self
):
return
self
.
models_by_number
.
keys
()
def
__contains__
(
self
,
model_number
):
return
self
.
models_by_number
.
__contains__
(
model_number
)
def
__getitem__
(
self
,
model_number
):
return
self
.
models_by_number
[
model_number
]
def
__iter__
(
self
):
for
model
in
self
.
models
:
yield
model
def
iter_models
(
self
,
use_all_models
=
False
):
if
use_all_models
:
for
model
in
self
:
yield
model
elif
len
(
self
.
models
)
>
0
:
yield
self
.
models
[
0
]
def
iter_chains
(
self
,
use_all_models
=
False
):
for
model
in
self
.
iter_models
(
use_all_models
):
for
chain
in
model
.
iter_chains
():
yield
chain
def
iter_residues
(
self
,
use_all_models
=
False
):
for
model
in
self
.
iter_models
(
use_all_models
):
for
res
in
model
.
iter_residues
():
yield
res
def
iter_atoms
(
self
,
use_all_models
=
False
):
for
model
in
self
.
iter_models
(
use_all_models
):
for
atom
in
model
.
iter_atoms
():
yield
atom
def
iter_positions
(
self
,
use_all_models
=
False
,
include_alt_loc
=
False
):
"""
Iterate over atomic positions.
Parameters
- use_all_models (bool=False) Get positions from all models or just the first one.
- include_alt_loc (bool=False) Get all positions for each atom, or just the first one.
"""
for
model
in
self
.
iter_models
(
use_all_models
):
for
loc
in
model
.
iter_positions
(
include_alt_loc
):
yield
loc
def
__len__
(
self
):
return
len
(
self
.
models
)
def
_add_atom
(
self
,
atom
):
"""
"""
if
self
.
_current_model
==
None
:
self
.
_add_model
(
Model
(
0
))
atom
.
model_number
=
self
.
_current_model
.
number
# Atom might be alternate position for existing atom
self
.
_current_model
.
_add_atom
(
atom
)
def
_finalize
(
self
):
"""Establish first and last residues, atoms, etc."""
for
model
in
self
.
models
:
model
.
_finalize
()
class
Model
(
object
):
"""Model holds one model of a PDB structure.
NMR structures usually have multiple models. This represents one
of them.
"""
def
__init__
(
self
,
model_number
=
1
):
self
.
number
=
model_number
self
.
chains
=
[]
self
.
_current_chain
=
None
self
.
chains_by_id
=
{}
def
_add_atom
(
self
,
atom
):
"""
"""
if
len
(
self
.
chains
)
==
0
:
self
.
_add_chain
(
Chain
(
atom
.
chain_id
))
# Create a new chain if the chain id has changed
if
self
.
_current_chain
.
chain_id
!=
atom
.
chain_id
:
self
.
_add_chain
(
Chain
(
atom
.
chain_id
))
# Create a new chain after TER record, even if ID is the same
elif
self
.
_current_chain
.
has_ter_record
:
self
.
_add_chain
(
Chain
(
atom
.
chain_id
))
self
.
_current_chain
.
_add_atom
(
atom
)
def
_add_chain
(
self
,
chain
):
self
.
chains
.
append
(
chain
)
self
.
_current_chain
=
chain
if
not
chain
.
chain_id
in
self
.
chains_by_id
:
self
.
chains_by_id
[
chain
.
chain_id
]
=
chain
def
get_chain
(
self
,
chain_id
):
return
self
.
chains_by_id
[
chain_id
]
def
chain_ids
(
self
):
return
self
.
chains_by_id
.
keys
()
def
__contains__
(
self
,
chain_id
):
return
self
.
chains_by_id
.
__contains__
(
chain_id
)
def
__getitem__
(
self
,
chain_id
):
return
self
.
chains_by_id
[
chain_id
]
def
__iter__
(
self
):
return
iter
(
self
.
chains
)
def
iter_chains
(
self
):
for
chain
in
self
:
yield
chain
def
iter_residues
(
self
):
for
chain
in
self
:
for
res
in
chain
.
iter_residues
():
yield
res
def
iter_atoms
(
self
):
for
chain
in
self
:
for
atom
in
chain
.
iter_atoms
():
yield
atom
def
iter_positions
(
self
,
include_alt_loc
=
False
):
for
chain
in
self
:
for
loc
in
chain
.
iter_positions
(
include_alt_loc
):
yield
loc
def
__len__
(
self
):
return
len
(
self
.
chains
)
def
write
(
self
,
output_stream
=
sys
.
stdout
):
# Start atom serial numbers at 1
sn
=
Model
.
AtomSerialNumber
(
1
)
for
chain
in
self
.
chains
:
chain
.
write
(
sn
,
output_stream
)
def
_finalize
(
self
):
for
chain
in
self
.
chains
:
chain
.
_finalize
()
class
AtomSerialNumber
(
object
):
"""pdb.Model inner class for pass-by-reference incrementable serial number"""
def
__init__
(
self
,
val
):
self
.
val
=
val
def
increment
(
self
):
self
.
val
+=
1
class
Chain
(
object
):
def
__init__
(
self
,
chain_id
=
' '
):
self
.
chain_id
=
chain_id
self
.
residues
=
[]
self
.
has_ter_record
=
False
self
.
_current_residue
=
None
self
.
residues_by_num_icode
=
{}
self
.
residues_by_number
=
{}
def
_add_atom
(
self
,
atom
):
"""
"""
# Create a residue if none have been created
if
len
(
self
.
residues
)
==
0
:
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
# Create a residue if the residue information has changed
elif
self
.
_current_residue
.
number
!=
atom
.
residue_number
:
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
elif
self
.
_current_residue
.
insertion_code
!=
atom
.
insertion_code
:
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
elif
self
.
_current_residue
.
name_with_spaces
==
atom
.
residue_name_with_spaces
:
# This is a normal case: number, name, and iCode have not changed
pass
elif
atom
.
alternate_location_indicator
!=
' '
:
# OK - this is a point mutation, Residue._add_atom will know what to do
pass
else
:
# Residue name does not match
# Only residue name does not match
warnings
.
warn
(
"WARNING: two consecutive residues with same number (%s, %s)"
%
(
atom
,
self
.
_current_residue
.
atoms
[
-
1
]))
self
.
_add_residue
(
Residue
(
atom
.
residue_name_with_spaces
,
atom
.
residue_number
,
atom
.
insertion_code
,
atom
.
alternate_location_indicator
))
self
.
_current_residue
.
_add_atom
(
atom
)
def
_add_residue
(
self
,
residue
):
if
len
(
self
.
residues
)
==
0
:
residue
.
is_first_in_chain
=
True
self
.
residues
.
append
(
residue
)
self
.
_current_residue
=
residue
key
=
str
(
residue
.
number
)
+
residue
.
insertion_code
# only store the first residue with a particular key
if
key
not
in
self
.
residues_by_num_icode
:
self
.
residues_by_num_icode
[
key
]
=
residue
if
residue
.
number
not
in
self
.
residues_by_number
:
self
.
residues_by_number
[
residue
.
number
]
=
residue
def
write
(
self
,
next_serial_number
,
output_stream
=
sys
.
stdout
):
for
residue
in
self
.
residues
:
residue
.
write
(
next_serial_number
,
output_stream
)
if
self
.
has_ter_record
:
r
=
self
.
residues
[
-
1
]
print
>>
output_stream
,
"TER %5d %3s %1s%4d%1s"
%
(
next_serial_number
.
val
,
r
.
name_with_spaces
,
self
.
chain_id
,
r
.
number
,
r
.
insertion_code
)
next_serial_number
.
increment
()
def
_add_ter_record
(
self
):
self
.
has_ter_record
=
True
self
.
_finalize
()
def
get_residue
(
self
,
residue_number
,
insertion_code
=
' '
):
return
residues_by_num_icode
[
str
(
residue_number
)
+
insertion_code
]
def
__contains__
(
self
,
residue_number
):
return
self
.
residues_by_number
.
__contains__
(
residue_number
)
def
__getitem__
(
self
,
residue_number
):
"""Returns the FIRST residue in this chain with a particular residue number"""
return
self
.
residues_by_number
[
residue_number
]
def
__iter__
(
self
):
for
res
in
self
.
residues
:
yield
res
def
iter_residues
(
self
):
for
res
in
self
:
yield
res
def
iter_atoms
(
self
):
for
res
in
self
:
for
atom
in
res
:
yield
atom
;
def
iter_positions
(
self
,
include_alt_loc
=
False
):
for
res
in
self
:
for
loc
in
res
.
iter_positions
(
include_alt_loc
):
yield
loc
def
__len__
(
self
):
return
len
(
self
.
residues
)
def
_finalize
(
self
):
self
.
residues
[
0
].
is_first_in_chain
=
True
self
.
residues
[
-
1
].
is_final_in_chain
=
True
for
residue
in
self
.
residues
:
residue
.
_finalize
()
class
Residue
(
object
):
def
__init__
(
self
,
name
,
number
,
insertion_code
=
' '
,
primary_alternate_location_indicator
=
' '
):
alt_loc
=
primary_alternate_location_indicator
self
.
primary_location_id
=
alt_loc
self
.
locations
=
{}
self
.
locations
[
alt_loc
]
=
Residue
.
Location
(
alt_loc
,
name
)
self
.
name_with_spaces
=
name
self
.
number
=
number
self
.
insertion_code
=
insertion_code
self
.
atoms
=
[]
self
.
atoms_by_name
=
{}
self
.
is_first_in_chain
=
False
self
.
is_final_in_chain
=
False
self
.
_current_atom
=
None
def
_add_atom
(
self
,
atom
):
"""
"""
alt_loc
=
atom
.
alternate_location_indicator
if
not
self
.
locations
.
has_key
(
alt_loc
):
self
.
locations
[
alt_loc
]
=
Residue
.
Location
(
alt_loc
,
atom
.
residue_name_with_spaces
)
assert
atom
.
residue_number
==
self
.
number
assert
atom
.
insertion_code
==
self
.
insertion_code
# Check whether this is an existing atom with another position
if
(
atom
.
name_with_spaces
in
self
.
atoms_by_name
):
old_atom
=
self
.
atoms_by_name
[
atom
.
name_with_spaces
]
# Unless this is a duplicated atom (warn about file error)
if
atom
.
alternate_location_indicator
in
old_atom
.
locations
:
warnings
.
warn
(
"WARNING: duplicate atom (%s, %s)"
%
(
atom
,
old_atom
.
_pdb_string
(
old_atom
.
serial_number
,
atom
.
alternate_location_indicator
)))
else
:
for
alt_loc
,
position
in
atom
.
locations
.
items
():
old_atom
.
locations
[
alt_loc
]
=
position
return
# no new atom added
# actually use new atom
self
.
atoms_by_name
[
atom
.
name
]
=
atom
self
.
atoms_by_name
[
atom
.
name_with_spaces
]
=
atom
self
.
atoms
.
append
(
atom
)
self
.
_current_atom
=
atom
def
write
(
self
,
next_serial_number
,
output_stream
=
sys
.
stdout
,
alt_loc
=
"*"
):
for
atom
in
self
.
atoms
:
atom
.
write
(
next_serial_number
,
output_stream
,
alt_loc
)
def
_finalize
(
self
):
if
len
(
self
.
atoms
)
>
0
:
self
.
atoms
[
0
].
is_first_atom_in_chain
=
self
.
is_first_in_chain
self
.
atoms
[
-
1
].
is_final_atom_in_chain
=
self
.
is_final_in_chain
for
atom
in
self
.
atoms
:
atom
.
is_first_residue_in_chain
=
self
.
is_first_in_chain
atom
.
is_final_residue_in_chain
=
self
.
is_final_in_chain
def
set_name_with_spaces
(
self
,
name
,
alt_loc
=
None
):
# Gromacs ffamber PDB files can have 4-character residue names
# assert len(name) == 3
if
alt_loc
==
None
:
alt_loc
=
self
.
primary_location_id
loc
=
self
.
locations
[
alt_loc
]
loc
.
name_with_spaces
=
name
loc
.
name
=
name
.
strip
()
def
get_name_with_spaces
(
self
,
alt_loc
=
None
):
if
alt_loc
==
None
:
alt_loc
=
self
.
primary_location_id
loc
=
self
.
locations
[
alt_loc
]
return
loc
.
name_with_spaces
name_with_spaces
=
property
(
get_name_with_spaces
,
set_name_with_spaces
,
doc
=
'four-character residue name including spaces'
)
def
get_name
(
self
,
alt_loc
=
None
):
if
alt_loc
==
None
:
alt_loc
=
self
.
primary_location_id
loc
=
self
.
locations
[
alt_loc
]
return
loc
.
name
name
=
property
(
get_name
,
doc
=
'residue name'
)
def
get_atom
(
self
,
atom_name
):
return
self
.
atoms_by_name
[
atom_name
]
def
__contains__
(
self
,
atom_name
):
return
self
.
atoms_by_name
.
__contains__
(
atom_name
)
def
__getitem__
(
self
,
atom_name
):
"""Returns the FIRST atom in this residue with a particular atom name"""
return
self
.
atoms_by_name
[
atom_name
]
def
__iter__
(
self
):
"""
>>> pdb_lines = [ \
"ATOM 188 N CYS A 42 40.714 -5.292 12.123 1.00 11.29 N",\
"ATOM 189 CA CYS A 42 39.736 -5.883 12.911 1.00 10.01 C",\
"ATOM 190 C CYS A 42 40.339 -6.654 14.087 1.00 22.28 C",\
"ATOM 191 O CYS A 42 41.181 -7.530 13.859 1.00 13.70 O",\
"ATOM 192 CB CYS A 42 38.949 -6.825 12.002 1.00 9.67 C",\
"ATOM 193 SG CYS A 42 37.557 -7.514 12.922 1.00 20.12 S"]
>>> res = Residue("CYS", 42)
>>> for l in pdb_lines:
... res._add_atom(Atom(l))
...
>>> for atom in res:
... print atom
ATOM 188 N CYS A 42 40.714 -5.292 12.123 1.00 11.29 N
ATOM 189 CA CYS A 42 39.736 -5.883 12.911 1.00 10.01 C
ATOM 190 C CYS A 42 40.339 -6.654 14.087 1.00 22.28 C
ATOM 191 O CYS A 42 41.181 -7.530 13.859 1.00 13.70 O
ATOM 192 CB CYS A 42 38.949 -6.825 12.002 1.00 9.67 C
ATOM 193 SG CYS A 42 37.557 -7.514 12.922 1.00 20.12 S
"""
for
atom
in
self
.
iter_atoms
():
yield
atom
# Three possibilities: primary alt_loc, certain alt_loc, or all alt_locs
def
iter_atoms
(
self
,
alt_loc
=
None
):
if
alt_loc
==
None
:
locs
=
[
self
.
primary_location_id
]
elif
alt_loc
==
""
:
locs
=
[
self
.
primary_location_id
]
elif
alt_loc
==
"*"
:
locs
=
None
else
:
locs
=
list
(
alt_loc
)
# If an atom has any location in alt_loc, emit the atom
for
atom
in
self
.
atoms
:
use_atom
=
False
# start pessimistic
for
loc2
in
atom
.
locations
.
keys
():
# print "#%s#%s" % (loc2,locs)
if
locs
==
None
:
# means all locations
use_atom
=
True
elif
loc2
in
locs
:
use_atom
=
True
if
use_atom
:
yield
atom
def
iter_positions
(
self
,
include_alt_loc
=
False
):
"""
Returns one position per atom, even if an individual atom has multiple positions.
>>> pdb_lines = [ \
"ATOM 188 N CYS A 42 40.714 -5.292 12.123 1.00 11.29 N",\
"ATOM 189 CA CYS A 42 39.736 -5.883 12.911 1.00 10.01 C",\
"ATOM 190 C CYS A 42 40.339 -6.654 14.087 1.00 22.28 C",\
"ATOM 191 O CYS A 42 41.181 -7.530 13.859 1.00 13.70 O",\
"ATOM 192 CB CYS A 42 38.949 -6.825 12.002 1.00 9.67 C",\
"ATOM 193 SG CYS A 42 37.557 -7.514 12.922 1.00 20.12 S"]
>>> res = Residue("CYS", 42)
>>> for l in pdb_lines: res._add_atom(Atom(l))
>>> for c in res.iter_positions:
... print c
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'instancemethod' object is not iterable
>>> for c in res.iter_positions():
... print c
[40.714, -5.292, 12.123] A
[39.736, -5.883, 12.911] A
[40.339, -6.654, 14.087] A
[41.181, -7.53, 13.859] A
[38.949, -6.825, 12.002] A
[37.557, -7.514, 12.922] A
"""
for
atom
in
self
:
if
include_alt_loc
:
for
loc
in
atom
.
iter_positions
():
yield
loc
else
:
yield
atom
.
position
def
__len__
(
self
):
return
len
(
self
.
atoms
)
# Residues can have multiple locations, based on alt_loc indicator
class
Location
:
"""
Inner class of residue to allow different residue names for different alternate_locations.
"""
def
__init__
(
self
,
alternate_location_indicator
,
residue_name_with_spaces
):
self
.
alternate_location_indicator
=
alternate_location_indicator
self
.
residue_name_with_spaces
=
residue_name_with_spaces
class
Atom
(
object
):
"""Atom represents one atom in a PDB structure.
"""
def
__init__
(
self
,
pdb_line
):
"""Create a new pdb.Atom from an ATOM or HETATM line.
Example line:
ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C
00000000011111111112222222222333333333344444444445555555555666666666677777777778
12345678901234567890123456789012345678901234567890123456789012345678901234567890
ATOM line format description from
http://deposit.rcsb.org/adit/docs/pdb_atom_format.html:
COLUMNS DATA TYPE CONTENTS
--------------------------------------------------------------------------------
1 - 6 Record name "ATOM "
7 - 11 Integer Atom serial number.
13 - 16 Atom Atom name.
17 Character Alternate location indicator.
18 - 20 Residue name Residue name.
22 Character Chain identifier.
23 - 26 Integer Residue sequence number.
27 AChar Code for insertion of residues.
31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
55 - 60 Real(6.2) Occupancy.
61 - 66 Real(6.2) Temperature factor (Default = 0.0).
73 - 76 LString(4) Segment identifier, left-justified.
77 - 78 LString(2) Element symbol, right-justified.
79 - 80 LString(2) Charge on the atom.
"""
# We might modify first/final status during _finalize() methods
self
.
is_first_atom_in_chain
=
False
self
.
is_final_atom_in_chain
=
False
self
.
is_first_residue_in_chain
=
False
self
.
is_final_residue_in_chain
=
False
# Start parsing fields from pdb line
self
.
record_name
=
pdb_line
[
0
:
6
].
strip
()
self
.
serial_number
=
int
(
pdb_line
[
6
:
11
])
self
.
name_with_spaces
=
pdb_line
[
12
:
16
]
alternate_location_indicator
=
pdb_line
[
16
]
self
.
residue_name_with_spaces
=
pdb_line
[
17
:
20
]
# In some MD codes, notably ffamber in gromacs, residue name has a fourth character in
# column 21
possible_fourth_character
=
pdb_line
[
20
:
21
]
if
possible_fourth_character
!=
" "
:
# Fourth character should only be there if official 3 are already full
if
len
(
self
.
residue_name_with_spaces
.
strip
())
!=
3
:
raise
ValueError
(
'Misaligned residue name: %s'
%
pdb_line
)
self
.
residue_name_with_spaces
+=
possible_fourth_character
self
.
residue_name
=
self
.
residue_name_with_spaces
.
strip
()
self
.
chain_id
=
pdb_line
[
21
]
self
.
residue_number
=
int
(
pdb_line
[
22
:
26
])
self
.
insertion_code
=
pdb_line
[
26
]
# coordinates, occupancy, and temperature factor belong in Atom.Location object
x
=
float
(
pdb_line
[
30
:
38
])
y
=
float
(
pdb_line
[
38
:
46
])
z
=
float
(
pdb_line
[
46
:
54
])
occupancy
=
float
(
pdb_line
[
54
:
60
])
temperature_factor
=
float
(
pdb_line
[
60
:
66
])
*
unit
.
angstroms
*
unit
.
angstroms
self
.
locations
=
{}
loc
=
Atom
.
Location
(
alternate_location_indicator
,
Vec3
([
x
,
y
,
z
])
*
unit
.
angstroms
,
occupancy
,
temperature_factor
,
self
.
residue_name_with_spaces
)
self
.
locations
[
alternate_location_indicator
]
=
loc
self
.
default_location_id
=
alternate_location_indicator
# segment id, element_symbol, and formal_charge are not always present
self
.
segment_id
=
pdb_line
[
72
:
76
].
strip
()
self
.
element_symbol
=
pdb_line
[
76
:
78
].
strip
()
try
:
self
.
formal_charge
=
int
(
pdb_line
[
78
:
80
])
except
ValueError
:
self
.
formal_charge
=
None
# figure out atom element
try
:
# First try to find a sensible element symbol from columns 76-77
self
.
element
=
element
.
get_by_symbol
(
self
.
element_symbol
)
except
KeyError
:
# otherwise, deduce element from first two characters of atom name
# remove digits found in some hydrogen atom names
symbol
=
self
.
name_with_spaces
[
0
:
2
].
strip
().
lstrip
(
"0123456789"
)
try
:
# Some molecular dynamics PDB files, such as gromacs with ffamber force
# field, include 4-character hydrogen atom names beginning with "H".
# Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four
# characters in the atom name. This problem is the fault of molecular
# dynamics code authors who feel the need to make up their own atom
# nomenclature because it is too tedious to read that provided by the PDB.
# These are the same folks who invent their own meanings for biochemical terms
# like "dipeptide". Clowntards.
if
len
(
self
.
name
)
==
4
and
self
.
name
[
0
:
1
]
==
"H"
:
self
.
element
=
element
.
hydrogen
else
:
self
.
element
=
element
.
get_by_symbol
(
symbol
)
except
KeyError
:
# OK, I give up
self
.
element
=
None
warnings
.
warn
(
"WARNING: Unknown element '%s': %s"
%
(
symbol
,
pdb_line
))
def
iter_locations
(
self
):
"""
Iterate over Atom.Location objects for this atom, including primary location.
>>> atom = Atom("ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C")
>>> for c in atom.iter_locations():
... print c
...
[6.167, 22.607, 20.046] A
"""
for
alt_loc
in
self
.
locations
:
yield
self
.
locations
[
alt_loc
]
def
iter_positions
(
self
):
"""
Iterate over atomic positions. Returns Quantity(Vec3(), unit) objects, unlike
iter_locations, which returns Atom.Location objects.
"""
for
loc
in
self
.
iter_locations
():
yield
loc
.
position
def
iter_coordinates
(
self
):
"""
Iterate over x, y, z values of primary atom position.
>>> atom = Atom("ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C")
>>> for c in atom.iter_coordinates():
... print c
...
6.167 A
22.607 A
20.046 A
"""
for
coord
in
self
.
position
:
yield
coord
# Hide existence of multiple alternate locations to avoid scaring casual users
def
get_location
(
self
,
location_id
=
None
):
id
=
location_id
if
(
id
==
None
):
id
=
self
.
default_location_id
return
self
.
locations
[
id
]
def
set_location
(
self
,
new_location
,
location_id
=
None
):
id
=
location_id
if
(
id
==
None
):
id
=
self
.
default_location_id
self
.
locations
[
id
]
=
new_location
location
=
property
(
get_location
,
set_location
,
doc
=
'default Atom.Location object'
)
def
get_position
(
self
):
return
self
.
location
.
position
def
set_position
(
self
,
coords
):
self
.
location
.
position
=
coords
position
=
property
(
get_position
,
set_position
,
doc
=
'orthogonal coordinates'
)
def
get_alternate_location_indicator
(
self
):
return
self
.
location
.
alternate_location_indicator
alternate_location_indicator
=
property
(
get_alternate_location_indicator
)
def
get_occupancy
(
self
):
return
self
.
location
.
occupancy
occupancy
=
property
(
get_occupancy
)
def
get_temperature_factor
(
self
):
return
self
.
location
.
temperature_factor
temperature_factor
=
property
(
get_temperature_factor
)
def
get_x
(
self
):
return
self
.
position
[
0
]
x
=
property
(
get_x
)
def
get_y
(
self
):
return
self
.
position
[
1
]
y
=
property
(
get_y
)
def
get_z
(
self
):
return
self
.
position
[
2
]
z
=
property
(
get_z
)
def
_pdb_string
(
self
,
serial_number
=
None
,
alternate_location_indicator
=
None
):
"""
Produce a PDB line for this atom using a particular serial number and alternate location
"""
if
serial_number
==
None
:
serial_number
=
self
.
serial_number
if
alternate_location_indicator
==
None
:
alternate_location_indicator
=
self
.
alternate_location_indicator
# produce PDB line in three parts: names, numbers, and end
# Accomodate 4-character residue names that use column 21
long_res_name
=
self
.
residue_name_with_spaces
if
len
(
long_res_name
)
==
3
:
long_res_name
+=
" "
assert
len
(
long_res_name
)
==
4
names
=
"%-6s%5d %4s%1s%4s%1s%4d%1s "
%
(
self
.
record_name
,
serial_number
,
\
self
.
name_with_spaces
,
alternate_location_indicator
,
\
long_res_name
,
self
.
chain_id
,
\
self
.
residue_number
,
self
.
insertion_code
)
numbers
=
"%8.3f%8.3f%8.3f%6.2f%6.2f "
%
(
self
.
x
.
value_in_unit
(
unit
.
angstroms
),
\
self
.
y
.
value_in_unit
(
unit
.
angstroms
),
\
self
.
z
.
value_in_unit
(
unit
.
angstroms
),
\
self
.
occupancy
,
\
self
.
temperature_factor
.
value_in_unit
(
unit
.
angstroms
*
unit
.
angstroms
))
end
=
"%-4s%2s"
%
(
\
self
.
segment_id
,
self
.
element_symbol
)
formal_charge
=
" "
if
(
self
.
formal_charge
!=
None
):
formal_charge
=
"%+2d"
%
self
.
formal_charge
return
names
+
numbers
+
end
+
formal_charge
def
__str__
(
self
):
return
self
.
_pdb_string
(
self
.
serial_number
,
self
.
alternate_location_indicator
)
def
write
(
self
,
next_serial_number
,
output_stream
=
sys
.
stdout
,
alt_loc
=
"*"
):
"""
alt_loc = "*" means write all alternate locations
alt_loc = None means write just the primary location
alt_loc = "AB" means write locations "A" and "B"
"""
if
alt_loc
==
None
:
locs
=
[
self
.
default_location_id
]
elif
alt_loc
==
""
:
locs
=
[
self
.
default_location_id
]
elif
alt_loc
==
"*"
:
locs
=
self
.
locations
.
keys
()
locs
.
sort
()
else
:
locs
=
list
(
alt_loc
)
for
loc_id
in
locs
:
print
>>
output_stream
,
self
.
_pdb_string
(
next_serial_number
.
val
,
loc_id
)
next_serial_number
.
increment
()
def
set_name_with_spaces
(
self
,
name
):
assert
len
(
name
)
==
4
self
.
_name_with_spaces
=
name
self
.
_name
=
name
.
strip
()
def
get_name_with_spaces
(
self
):
return
self
.
_name_with_spaces
name_with_spaces
=
property
(
get_name_with_spaces
,
set_name_with_spaces
,
doc
=
'four-character residue name including spaces'
)
def
get_name
(
self
):
return
self
.
_name
name
=
property
(
get_name
,
doc
=
'residue name'
)
class
Location
(
object
):
"""
Inner class of Atom for holding alternate locations
"""
def
__init__
(
self
,
alt_loc
,
position
,
occupancy
,
temperature_factor
,
residue_name
):
self
.
alternate_location_indicator
=
alt_loc
self
.
position
=
position
self
.
occupancy
=
occupancy
self
.
temperature_factor
=
temperature_factor
self
.
residue_name
=
residue_name
def
__iter__
(
self
):
"""
Examples
>>> from simtk.vec3 import Vec3
>>> import simtk.unit as unit
>>> l = Atom.Location(' ', Vec3([1,2,3])*unit.angstroms, 1.0, 20.0*unit.angstroms**2, "XXX")
>>> for c in l:
... print c
...
1 A
2 A
3 A
"""
for
coord
in
self
.
position
:
yield
coord
def
__str__
(
self
):
return
str
(
self
.
position
)
# run module directly for testing
if
__name__
==
'__main__'
:
# Test the examples in the docstrings
import
doctest
,
sys
doctest
.
testmod
(
sys
.
modules
[
__name__
])
import
sys
import
os
import
gzip
import
re
import
time
# Test atom line parsing
pdb_line
=
"ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C"
a
=
Atom
(
pdb_line
)
assert
a
.
record_name
==
"ATOM"
assert
a
.
serial_number
==
2209
assert
a
.
name
==
"CB"
assert
a
.
name_with_spaces
==
" CB "
assert
a
.
residue_name
==
"TYR"
assert
a
.
residue_name_with_spaces
==
"TYR"
assert
a
.
chain_id
==
"A"
assert
a
.
residue_number
==
299
assert
a
.
insertion_code
==
" "
assert
a
.
alternate_location_indicator
==
" "
assert
a
.
x
==
6.167
*
unit
.
angstroms
assert
a
.
y
==
22.607
*
unit
.
angstroms
assert
a
.
z
==
20.046
*
unit
.
angstroms
assert
a
.
occupancy
==
1.00
assert
a
.
temperature_factor
==
8.12
*
unit
.
angstroms
*
unit
.
angstroms
assert
a
.
segment_id
==
""
assert
a
.
element_symbol
==
"C"
# print pdb_line
# print str(a)
assert
str
(
a
).
rstrip
()
==
pdb_line
.
rstrip
()
a
=
Atom
(
"ATOM 2209 CB TYR A 299 6.167 22.607 20.046 1.00 8.12 C"
)
# misaligned residue name - bad
try
:
a
=
Atom
(
"ATOM 2209 CB TYRA 299 6.167 22.607 20.046 1.00 8.12 C"
)
assert
(
False
)
except
ValueError
:
pass
# four character residue name -- not so bad
a
=
Atom
(
"ATOM 2209 CB NTYRA 299 6.167 22.607 20.046 1.00 8.12 C"
)
atom_count
=
0
residue_count
=
0
chain_count
=
0
model_count
=
0
structure_count
=
0
def
parse_one_pdb
(
pdb_file_name
):
global
atom_count
,
residue_count
,
chain_count
,
model_count
,
structure_count
print
pdb_file_name
if
pdb_file_name
[
-
3
:]
==
".gz"
:
fh
=
gzip
.
open
(
pdb_file_name
)
else
:
fh
=
open
(
pdb_file_name
)
pdb
=
PdbStructure
(
fh
,
load_all_models
=
True
)
# print " %d atoms found" % len(pdb.atoms)
atom_count
+=
len
(
list
(
pdb
.
iter_atoms
()))
residue_count
+=
len
(
list
(
pdb
.
iter_residues
()))
chain_count
+=
len
(
list
(
pdb
.
iter_chains
()))
model_count
+=
len
(
list
(
pdb
.
iter_models
()))
structure_count
+=
1
fh
.
close
return
pdb
# Parse one file
pdb_file_name
=
"/home/Christopher Bruns/Desktop/1ARJ.pdb"
if
os
.
path
.
exists
(
pdb_file_name
):
parse_one_pdb
(
pdb_file_name
)
# try parsing the entire PDB
pdb_dir
=
"/cygdrive/j/pdb/data/structures/divided/pdb"
if
os
.
path
.
exists
(
pdb_dir
):
parse_entire_pdb
=
False
parse_one_division
=
False
parse_one_file
=
False
start_time
=
time
.
time
()
if
parse_one_file
:
pdb_id
=
"2aed"
middle_two
=
pdb_id
[
1
:
3
]
full_pdb_file
=
os
.
path
.
join
(
pdb_dir
,
middle_two
,
"pdb%s.ent.gz"
%
pdb_id
)
parse_one_pdb
(
full_pdb_file
)
if
parse_one_division
:
subdir
=
"ae"
full_subdir
=
os
.
path
.
join
(
pdb_dir
,
subdir
)
for
pdb_file
in
os
.
listdir
(
full_subdir
):
if
not
re
.
match
(
"pdb.%2s.\.ent\.gz"
%
subdir
,
pdb_file
):
continue
full_pdb_file
=
os
.
path
.
join
(
full_subdir
,
pdb_file
)
parse_one_pdb
(
full_pdb_file
)
if
parse_entire_pdb
:
for
subdir
in
os
.
listdir
(
pdb_dir
):
if
not
len
(
subdir
)
==
2
:
continue
full_subdir
=
os
.
path
.
join
(
pdb_dir
,
subdir
)
if
not
os
.
path
.
isdir
(
full_subdir
):
continue
for
pdb_file
in
os
.
listdir
(
full_subdir
):
if
not
re
.
match
(
"pdb.%2s.\.ent\.gz"
%
subdir
,
pdb_file
):
continue
full_pdb_file
=
os
.
path
.
join
(
full_subdir
,
pdb_file
)
parse_one_pdb
(
full_pdb_file
)
end_time
=
time
.
time
()
elapsed
=
end_time
-
start_time
minutes
=
elapsed
/
60
seconds
=
elapsed
%
60
hours
=
minutes
/
60
minutes
=
minutes
%
60
print
"%dh:%02dm:%02ds elapsed"
%
(
hours
,
minutes
,
seconds
)
print
"%d atoms found"
%
atom_count
print
"%d residues found"
%
residue_count
print
"%d chains found"
%
chain_count
print
"%d models found"
%
model_count
print
"%d structures found"
%
structure_count
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment