Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
db72d0cb
Commit
db72d0cb
authored
Nov 06, 2014
by
peastman
Browse files
Merge pull request #702 from peastman/pdbx
Created reader for PDBx/mmCIF files
parents
4ab3b428
49722158
Changes
14
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
2844 additions
and
2 deletions
+2844
-2
docs-source/licenses/Licenses.txt
docs-source/licenses/Licenses.txt
+9
-1
wrappers/python/setup.py
wrappers/python/setup.py
+4
-1
wrappers/python/simtk/openmm/app/__init__.py
wrappers/python/simtk/openmm/app/__init__.py
+1
-0
wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
+0
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
...n/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
+819
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
...ython/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
+638
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
...mtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
+236
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
...ython/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
+461
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
.../simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
+126
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
.../python/simtk/openmm/app/internal/pdbx/reader/__init__.py
+0
-0
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
...ython/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
+191
-0
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
.../simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
+165
-0
wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
.../python/simtk/openmm/app/internal/pdbx/writer/__init__.py
+0
-0
wrappers/python/simtk/openmm/app/pdbxfile.py
wrappers/python/simtk/openmm/app/pdbxfile.py
+194
-0
No files found.
docs-source/licenses/Licenses.txt
View file @
db72d0cb
...
@@ -2,7 +2,7 @@ OpenMM was developed by Simbios, the NIH National Center for Physics-Based
...
@@ -2,7 +2,7 @@ OpenMM was developed by Simbios, the NIH National Center for Physics-Based
Simulation of Biological Structures at Stanford, funded under the NIH Roadmap
Simulation of Biological Structures at Stanford, funded under the NIH Roadmap
for Medical Research, grant U54 GM072970. See https://simtk.org.
for Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright
©
2008-2014 Stanford University and the Authors.
Portions copyright
� 2
008-2014 Stanford University and the Authors.
There are several licenses which cover different parts of OpenMM as described
There are several licenses which cover different parts of OpenMM as described
below.
below.
...
@@ -119,3 +119,11 @@ freely, subject to the following restrictions:
...
@@ -119,3 +119,11 @@ freely, subject to the following restrictions:
2. Altered source versions must be plainly marked as such, and must not be
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
3. This notice may not be removed or altered from any source distribution.
6. PdbxReader
OpenMM uses the PDBx/mmCIF parser written by John Westbrook. It is distributed
under the Creative Commons Attribution 3.0 Unported license. For details, see
https://creativecommons.org/licenses/by/3.0. This library was modified to move
it inside the simtk.openmm.app.internal module.
\ No newline at end of file
wrappers/python/setup.py
View file @
db72d0cb
...
@@ -145,7 +145,10 @@ def buildKeywordDictionary(major_version_num=MAJOR_VERSION_NUM,
...
@@ -145,7 +145,10 @@ def buildKeywordDictionary(major_version_num=MAJOR_VERSION_NUM,
"simtk.openmm"
,
"simtk.openmm"
,
"simtk.openmm.app"
,
"simtk.openmm.app"
,
"simtk.openmm.app.internal"
,
"simtk.openmm.app.internal"
,
"simtk.openmm.app.internal.charmm"
]
"simtk.openmm.app.internal.charmm"
,
"simtk.openmm.app.internal.pdbx"
,
"simtk.openmm.app.internal.pdbx.reader"
,
"simtk.openmm.app.internal.pdbx.writer"
]
setupKeywords
[
"data_files"
]
=
[]
setupKeywords
[
"data_files"
]
=
[]
setupKeywords
[
"package_data"
]
=
{
"simtk"
:
[],
setupKeywords
[
"package_data"
]
=
{
"simtk"
:
[],
"simtk.unit"
:
[],
"simtk.unit"
:
[],
...
...
wrappers/python/simtk/openmm/app/__init__.py
View file @
db72d0cb
...
@@ -12,6 +12,7 @@ __email__ = "peastman@stanford.edu"
...
@@ -12,6 +12,7 @@ __email__ = "peastman@stanford.edu"
from
topology
import
Topology
,
Chain
,
Residue
,
Atom
from
topology
import
Topology
,
Chain
,
Residue
,
Atom
from
pdbfile
import
PDBFile
from
pdbfile
import
PDBFile
from
pdbxfile
import
PDBxFile
from
forcefield
import
ForceField
from
forcefield
import
ForceField
from
simulation
import
Simulation
from
simulation
import
Simulation
from
pdbreporter
import
PDBReporter
from
pdbreporter
import
PDBReporter
...
...
wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
0 → 100644
View file @
db72d0cb
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
0 → 100644
View file @
db72d0cb
This diff is collapsed.
Click to expand it.
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
0 → 100644
View file @
db72d0cb
This diff is collapsed.
Click to expand it.
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxReadWriteTests.py
# Author: jdw
# Date: 9-Oct-2011
# Version: 0.001
#
# Updated:
# 24-Oct-2012 jdw update path details and reorganize.
#
##
""" Various tests caess for PDBx/mmCIF data file and dictionary reader and writer.
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
import
sys
,
unittest
,
traceback
import
sys
,
time
,
os
,
os
.
path
,
shutil
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app.internal.pdbx.writer.PdbxWriter
import
PdbxWriter
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxReadWriteTests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
lfh
=
sys
.
stdout
self
.
verbose
=
False
self
.
pathPdbxDataFile
=
"../tests/1kip.cif"
self
.
pathOutputFile
=
"testOutputDataFile.cif"
def
tearDown
(
self
):
pass
def
testSimpleInitialization
(
self
):
"""Test case - Simple initialization of a data category and data block
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
fn
=
"test-simple.cif"
attributeNameList
=
[
'aOne'
,
'aTwo'
,
'aThree'
,
'aFour'
,
'aFive'
,
'aSix'
,
'aSeven'
,
'aEight'
,
'aNine'
,
'aTen'
]
rowList
=
[[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
]
]
nameCat
=
'myCategory'
#
#
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
nameCat
,
attributeNameList
,
rowList
)
aCat
.
printIt
()
curContainer
.
append
(
aCat
)
curContainer
.
printIt
()
#
myContainerList
=
[]
myContainerList
.
append
(
curContainer
)
ofh
=
open
(
fn
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myContainerList
)
ofh
.
close
()
myContainerList
=
[]
ifh
=
open
(
fn
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myContainerList
)
ifh
.
close
()
for
container
in
myContainerList
:
for
objName
in
container
.
getObjNameList
():
name
,
aList
,
rList
=
container
.
getObj
(
objName
).
get
()
self
.
lfh
.
write
(
"Recovered data category %s
\n
"
%
name
)
self
.
lfh
.
write
(
"Attribute list %r
\n
"
%
repr
(
aList
))
self
.
lfh
.
write
(
"Row list %r
\n
"
%
repr
(
rList
))
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testWriteDataFile
(
self
):
"""Test case - write data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ofh
=
open
(
"test-output.cif"
,
"w"
)
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
7
,
6
,
5
,
4
,
3
,
2
,
1
])
aCat
.
printIt
()
curContainer
.
append
(
aCat
)
curContainer
.
printIt
()
#
myDataList
.
append
(
curContainer
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testUpdateDataFile
(
self
):
"""Test case - update data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
# Create a initial data file --
#
myDataList
=
[]
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
([
9
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
10
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
11
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
12
,
2
,
3
,
4
,
5
,
6
,
7
])
#self.lfh.write("Assigned data category state-----------------\n")
#aCat.dumpIt(fh=self.lfh)
curContainer
.
append
(
aCat
)
myDataList
.
append
(
curContainer
)
ofh
=
open
(
"test-output-1.cif"
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
#
#
# Read and update the data -
#
myDataList
=
[]
ifh
=
open
(
"test-output-1.cif"
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
#
myBlock
=
myDataList
[
0
]
myBlock
.
printIt
()
myCat
=
myBlock
.
getObj
(
'pdbx_seqtool_mapping_ref'
)
myCat
.
printIt
()
for
iRow
in
xrange
(
0
,
myCat
.
getRowCount
()):
myCat
.
setValue
(
'some value'
,
'ref_mon_id'
,
iRow
)
myCat
.
setValue
(
100
,
'ref_mon_num'
,
iRow
)
ofh
=
open
(
"test-output-2.cif"
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
#
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testReadDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testReadWriteDataFile
(
self
):
"""Test case - data file read write test
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
ofh
=
open
(
self
.
pathOutputFile
,
"w"
)
pWr
=
PdbxWriter
(
ofh
)
pWr
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
simpleSuite
():
suiteSelect
=
unittest
.
TestSuite
()
suiteSelect
.
addTest
(
PdbxReadWriteTests
(
"testSimpleInitialization"
))
suiteSelect
.
addTest
(
PdbxReadWriteTests
(
"testUpdateDataFile"
))
suiteSelect
.
addTest
(
PdbxReadWriteTests
(
"testReadWriteDataFile"
))
return
suiteSelect
if
__name__
==
'__main__'
:
#
mySuite
=
simpleSuite
()
unittest
.
TextTestRunner
(
verbosity
=
2
).
run
(
mySuite
)
#
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxReader.py
# Date: 2012-01-09 Jdw Adapted from PdbxParser
#
# Updates:
#
# 2012-01-09 - (jdw) Separate reader and writer classes.
#
# 2012-09-02 - (jdw) Revise tokenizer to better handle embedded quoting.
#
##
"""
PDBx/mmCIF dictionary and data file parser.
Acknowledgements:
The tokenizer used in this module is modeled after the clever parser design
used in the PyMMLIB package.
PyMMLib Development Group
Authors: Ethan Merritt: merritt@u.washington.ed & Jay Painter: jay.painter@gmail.com
See: http://pymmlib.sourceforge.net/
"""
import
re
,
sys
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxError
(
Exception
):
""" Class for catch general errors
"""
pass
class
SyntaxError
(
Exception
):
""" Class for catching syntax errors
"""
def
__init__
(
self
,
lineNumber
,
text
):
Exception
.
__init__
(
self
)
self
.
lineNumber
=
lineNumber
self
.
text
=
text
def
__str__
(
self
):
return
"%%ERROR - [at line: %d] %s"
%
(
self
.
lineNumber
,
self
.
text
)
class
PdbxReader
(
object
):
""" PDBx reader for data files and dictionaries.
"""
def
__init__
(
self
,
ifh
):
""" ifh - input file handle returned by open()
"""
#
self
.
__curLineNumber
=
0
self
.
__ifh
=
ifh
self
.
__stateDict
=
{
"data"
:
"ST_DATA_CONTAINER"
,
"loop"
:
"ST_TABLE"
,
"global"
:
"ST_GLOBAL_CONTAINER"
,
"save"
:
"ST_DEFINITION"
,
"stop"
:
"ST_STOP"
}
def
read
(
self
,
containerList
):
"""
Appends to the input list of definition and data containers.
"""
self
.
__curLineNumber
=
0
try
:
self
.
__parser
(
self
.
__tokenizer
(
self
.
__ifh
),
containerList
)
except
StopIteration
:
pass
else
:
raise
PdbxError
()
def
__syntaxError
(
self
,
errText
):
raise
SyntaxError
(
self
.
__curLineNumber
,
errText
)
def
__getContainerName
(
self
,
inWord
):
""" Returns the name of the data_ or save_ container
"""
return
str
(
inWord
[
5
:]).
strip
()
def
__getState
(
self
,
inWord
):
"""Identifies reserved syntax elements and assigns an associated state.
Returns: (reserved word, state)
where -
reserved word - is one of CIF syntax elements:
data_, loop_, global_, save_, stop_
state - the parser state required to process this next section.
"""
i
=
inWord
.
find
(
"_"
)
if
i
==
-
1
:
return
None
,
"ST_UNKNOWN"
try
:
rWord
=
inWord
[:
i
].
lower
()
return
rWord
,
self
.
__stateDict
[
rWord
]
except
:
return
None
,
"ST_UNKNOWN"
def
__parser
(
self
,
tokenizer
,
containerList
):
""" Parser for PDBx data files and dictionaries.
Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
quoted strings (single, double and multi-line semi-colon delimited), and unquoted
strings.
containerList - list-type container for data and definition objects parsed from
from the input file.
Return:
containerList - is appended with data and definition objects -
"""
# Working container - data or definition
curContainer
=
None
#
# Working category container
categoryIndex
=
{}
curCategory
=
None
#
curRow
=
None
state
=
None
# Find the first reserved word and begin capturing data.
#
while
True
:
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curWord
is
None
:
continue
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
break
while
True
:
#
# Set the current state -
#
# At this point in the processing cycle we are expecting a token containing
# either a '_category.attribute' or a reserved word.
#
if
curCatName
is
not
None
:
state
=
"ST_KEY_VALUE_PAIR"
elif
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
else
:
self
.
__syntaxError
(
"Miscellaneous syntax error"
)
return
#
# Process _category.attribute value assignments
#
if
state
==
"ST_KEY_VALUE_PAIR"
:
try
:
curCategory
=
categoryIndex
[
curCatName
]
except
KeyError
:
# A new category is encountered - create a container and add a row
curCategory
=
categoryIndex
[
curCatName
]
=
DataCategory
(
curCatName
)
try
:
curContainer
.
append
(
curCategory
)
except
AttributeError
:
self
.
__syntaxError
(
"Category cannot be added to data_ block"
)
return
curRow
=
[]
curCategory
.
append
(
curRow
)
else
:
# Recover the existing row from the category
try
:
curRow
=
curCategory
[
0
]
except
IndexError
:
self
.
__syntaxError
(
"Internal index error accessing category data"
)
return
# Check for duplicate attributes and add attribute to table.
if
curAttName
in
curCategory
.
getAttributeList
():
self
.
__syntaxError
(
"Duplicate attribute encountered in category"
)
return
else
:
curCategory
.
appendAttribute
(
curAttName
)
# Get the data for this attribute from the next token
tCat
,
tAtt
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
tCat
is
not
None
or
(
curQuotedString
is
None
and
curWord
is
None
):
self
.
__syntaxError
(
"Missing data for item _%s.%s"
%
(
curCatName
,
curAttName
))
if
curWord
is
not
None
:
#
# Validation check token for misplaced reserved words -
#
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
self
.
__syntaxError
(
"Unexpected reserved word: %s"
%
(
reservedWord
))
curRow
.
append
(
curWord
)
elif
curQuotedString
is
not
None
:
curRow
.
append
(
curQuotedString
)
else
:
self
.
__syntaxError
(
"Missing value in item-value pair"
)
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
continue
#
# Process a loop_ declaration and associated data -
#
elif
state
==
"ST_TABLE"
:
# The category name in the next curCatName,curAttName pair
# defines the name of the category container.
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curCatName
is
None
or
curAttName
is
None
:
self
.
__syntaxError
(
"Unexpected token in loop_ declaration"
)
return
# Check for a previous category declaration.
if
categoryIndex
.
has_key
(
curCatName
):
self
.
__syntaxError
(
"Duplicate category declaration in loop_"
)
return
curCategory
=
DataCategory
(
curCatName
)
try
:
curContainer
.
append
(
curCategory
)
except
AttributeError
:
self
.
__syntaxError
(
"loop_ declaration outside of data_ block or save_ frame"
)
return
curCategory
.
appendAttribute
(
curAttName
)
# Read the rest of the loop_ declaration
while
True
:
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curCatName
is
None
:
break
if
curCatName
!=
curCategory
.
getName
():
self
.
__syntaxError
(
"Changed category name in loop_ declaration"
)
return
curCategory
.
appendAttribute
(
curAttName
)
# If the next token is a 'word', check it for any reserved words -
if
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
if
reservedWord
==
"stop"
:
return
else
:
self
.
__syntaxError
(
"Unexpected reserved word after loop declaration: %s"
%
(
reservedWord
))
# Read the table of data for this loop_ -
while
True
:
curRow
=
[]
curCategory
.
append
(
curRow
)
for
tAtt
in
curCategory
.
getAttributeList
():
if
curWord
is
not
None
:
curRow
.
append
(
curWord
)
elif
curQuotedString
is
not
None
:
curRow
.
append
(
curQuotedString
)
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
# loop_ data processing ends if -
# A new _category.attribute is encountered
if
curCatName
is
not
None
:
break
# A reserved word is encountered
if
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
break
continue
elif
state
==
"ST_DEFINITION"
:
# Ignore trailing unnamed saveframe delimiters e.g. 'save_'
sName
=
self
.
__getContainerName
(
curWord
)
if
(
len
(
sName
)
>
0
):
curContainer
=
DefinitionContainer
(
sName
)
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_DATA_CONTAINER"
:
#
dName
=
self
.
__getContainerName
(
curWord
)
if
len
(
dName
)
==
0
:
dName
=
"unidentified"
curContainer
=
DataContainer
(
dName
)
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_STOP"
:
return
elif
state
==
"ST_GLOBAL"
:
curContainer
=
DataContainer
(
"blank-global"
)
curContainer
.
setGlobal
()
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_UNKNOWN"
:
self
.
__syntaxError
(
"Unrecogized syntax element: "
+
str
(
curWord
))
return
def
__tokenizer
(
self
,
ifh
):
""" Tokenizer method for the mmCIF syntax file -
Each return/yield from this method returns information about
the next token in the form of a tuple with the following structure.
(category name, attribute name, quoted strings, words w/o quotes or white space)
Differentiated the reqular expression to the better handle embedded quotes.
"""
#
# Regex definition for mmCIF syntax - semi-colon delimited strings are handled
# outside of this regex.
mmcifRe
=
re
.
compile
(
r
"(?:"
"(?:_(.+?)[.](\S+))"
"|"
# _category.attribute
"(?:['](.*?)(?:[']\s|[']$))"
"|"
# single quoted strings
"(?:[
\"
](.*?)(?:[
\"
]\s|[
\"
]$))"
"|"
# double quoted strings
"(?:\s*#.*$)"
"|"
# comments (dumped)
"(\S+)"
# unquoted words
")"
)
fileIter
=
iter
(
ifh
)
## Tokenizer loop begins here ---
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
# Dump comments
if
line
.
startswith
(
"#"
):
continue
# Gobble up the entire semi-colon/multi-line delimited string and
# and stuff this into the string slot in the return tuple
#
if
line
.
startswith
(
";"
):
mlString
=
[
line
[
1
:]]
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
if
line
.
startswith
(
";"
):
break
mlString
.
append
(
line
)
# remove trailing new-line that is part of the \n; delimiter
mlString
[
-
1
]
=
mlString
[
-
1
].
rstrip
()
#
yield
(
None
,
None
,
""
.
join
(
mlString
),
None
)
#
# Need to process the remainder of the current line -
line
=
line
[
1
:]
#continue
# Apply regex to the current line consolidate the single/double
# quoted within the quoted string category
for
it
in
mmcifRe
.
finditer
(
line
):
tgroups
=
it
.
groups
()
if
tgroups
!=
(
None
,
None
,
None
,
None
,
None
):
if
tgroups
[
2
]
is
not
None
:
qs
=
tgroups
[
2
]
elif
tgroups
[
3
]
is
not
None
:
qs
=
tgroups
[
3
]
else
:
qs
=
None
groups
=
(
tgroups
[
0
],
tgroups
[
1
],
qs
,
tgroups
[
4
])
yield
groups
def
__tokenizerOrg
(
self
,
ifh
):
""" Tokenizer method for the mmCIF syntax file -
Each return/yield from this method returns information about
the next token in the form of a tuple with the following structure.
(category name, attribute name, quoted strings, words w/o quotes or white space)
"""
#
# Regex definition for mmCIF syntax - semi-colon delimited strings are handled
# outside of this regex.
mmcifRe
=
re
.
compile
(
r
"(?:"
"(?:_(.+?)[.](\S+))"
"|"
# _category.attribute
"(?:['
\"
](.*?)(?:['
\"
]\s|['
\"
]$))"
"|"
# quoted strings
"(?:\s*#.*$)"
"|"
# comments (dumped)
"(\S+)"
# unquoted words
")"
)
fileIter
=
iter
(
ifh
)
## Tokenizer loop begins here ---
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
# Dump comments
if
line
.
startswith
(
"#"
):
continue
# Gobble up the entire semi-colon/multi-line delimited string and
# and stuff this into the string slot in the return tuple
#
if
line
.
startswith
(
";"
):
mlString
=
[
line
[
1
:]]
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
if
line
.
startswith
(
";"
):
break
mlString
.
append
(
line
)
# remove trailing new-line that is part of the \n; delimiter
mlString
[
-
1
]
=
mlString
[
-
1
].
rstrip
()
#
yield
(
None
,
None
,
""
.
join
(
mlString
),
None
)
#
# Need to process the remainder of the current line -
line
=
line
[
1
:]
#continue
## Apply regex to the current line
for
it
in
mmcifRe
.
finditer
(
line
):
groups
=
it
.
groups
()
if
groups
!=
(
None
,
None
,
None
,
None
):
yield
groups
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxReaderTests.py
# Author: jdw
# Date: 9-Jan-2012
# Version: 0.001
#
# Update:
# 27-Sep-2012 jdw add test case for reading PDBx structure factor file
#
##
"""
Test cases for reading PDBx/mmCIF data files PdbxReader class -
"""
import
sys
,
unittest
,
traceback
import
sys
,
time
,
os
,
os
.
path
,
shutil
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxReaderTests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
lfh
=
sys
.
stderr
self
.
verbose
=
False
self
.
pathPdbxDataFile
=
"../tests/1kip.cif"
self
.
pathBigPdbxDataFile
=
"../tests/1ffk.cif"
self
.
pathSFDataFile
=
"../tests/1kip-sf.cif"
def
tearDown
(
self
):
pass
def
testReadSmallDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadBigDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathBigPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadSFDataFile
(
self
):
"""Test case - read PDB structure factor data file and compute statistics on f/sig(f).
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myContainerList
=
[]
ifh
=
open
(
self
.
pathSFDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myContainerList
)
c0
=
myContainerList
[
0
]
#
catObj
=
c0
.
getObj
(
"refln"
)
if
catObj
is
None
:
return
false
nRows
=
catObj
.
getRowCount
()
#
# Get column name index.
#
itDict
=
{}
itNameList
=
catObj
.
getItemNameList
()
for
idxIt
,
itName
in
enumerate
(
itNameList
):
itDict
[
str
(
itName
).
lower
()]
=
idxIt
#
idf
=
itDict
[
'_refln.f_meas_au'
]
idsigf
=
itDict
[
'_refln.f_meas_sigma_au'
]
minR
=
100
maxR
=-
1
sumR
=
0
icount
=
0
for
row
in
catObj
.
getRowList
():
try
:
f
=
float
(
row
[
idf
])
sigf
=
float
(
row
[
idsigf
])
ratio
=
sigf
/
f
#self.lfh.write(" %f %f %f\n" % (f,sigf,ratio))
maxR
=
max
(
maxR
,
ratio
)
minR
=
min
(
minR
,
ratio
)
sumR
+=
ratio
icount
+=
1
except
:
continue
ifh
.
close
()
self
.
lfh
.
write
(
"f/sig(f) min %f max %f avg %f count %d
\n
"
%
(
minR
,
maxR
,
sumR
/
icount
,
icount
))
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
simpleSuite
():
suiteSelect
=
unittest
.
TestSuite
()
suiteSelect
.
addTest
(
PdbxReaderTests
(
"testReadBigDataFile"
))
suiteSelect
.
addTest
(
PdbxReaderTests
(
"testReadSmallDataFile"
))
suiteSelect
.
addTest
(
PdbxReaderTests
(
"testReadSFDataFile"
))
return
suiteSelect
if
__name__
==
'__main__'
:
mySuite
=
simpleSuite
()
unittest
.
TextTestRunner
(
verbosity
=
2
).
run
(
mySuite
)
#
wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
0 → 100644
View file @
db72d0cb
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxWriter.py
# Date: 2011-10-09 Jdw Adapted from PdbxParser.py
#
# Updates:
# 5-Apr-2011 jdw Using the double quote format preference
# 23-Oct-2012 jdw update path details and reorganize.
#
###
"""
Classes for writing data and dictionary containers in PDBx/mmCIF format.
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxError
(
Exception
):
""" Class for catch general errors
"""
pass
class
PdbxWriter
(
object
):
"""Write PDBx data files or dictionaries using the input container
or container list.
"""
def
__init__
(
self
,
ofh
=
sys
.
stdout
):
self
.
__ofh
=
ofh
self
.
__containerList
=
[]
self
.
__MAXIMUM_LINE_LENGTH
=
2048
self
.
__SPACING
=
2
self
.
__INDENT_DEFINITION
=
3
self
.
__indentSpace
=
" "
*
self
.
__INDENT_DEFINITION
self
.
__doDefinitionIndent
=
False
# Maximum number of rows checked for value length and format
self
.
__rowPartition
=
None
def
setRowPartition
(
self
,
numRows
):
''' Maximum number of rows checked for value length and format
'''
self
.
__rowPartition
=
numRows
def
write
(
self
,
containerList
):
self
.
__containerList
=
containerList
for
container
in
self
.
__containerList
:
self
.
writeContainer
(
container
)
def
writeContainer
(
self
,
container
):
indS
=
" "
*
self
.
__INDENT_DEFINITION
if
isinstance
(
container
,
DefinitionContainer
):
self
.
__write
(
"save_%s
\n
"
%
container
.
getName
())
self
.
__doDefinitionIndent
=
True
self
.
__write
(
indS
+
"#
\n
"
)
elif
isinstance
(
container
,
DataContainer
):
if
(
container
.
getGlobal
()):
self
.
__write
(
"global_
\n
"
)
self
.
__doDefinitionIndent
=
False
self
.
__write
(
"
\n
"
)
else
:
self
.
__write
(
"data_%s
\n
"
%
container
.
getName
())
self
.
__doDefinitionIndent
=
False
self
.
__write
(
"#
\n
"
)
for
nm
in
container
.
getObjNameList
():
obj
=
container
.
getObj
(
nm
)
objL
=
obj
.
getRowList
()
# Skip empty objects
if
len
(
objL
)
==
0
:
continue
# Item - value formattting
elif
len
(
objL
)
==
1
:
self
.
__writeItemValueFormat
(
obj
)
# Table formatting -
elif
len
(
objL
)
>
1
and
len
(
obj
.
getAttributeList
())
>
0
:
self
.
__writeTableFormat
(
obj
)
else
:
raise
PdbxError
()
if
self
.
__doDefinitionIndent
:
self
.
__write
(
indS
+
"#"
)
else
:
self
.
__write
(
"#"
)
# Add a trailing saveframe reserved word
if
isinstance
(
container
,
DefinitionContainer
):
self
.
__write
(
"
\n
save_
\n
"
)
self
.
__write
(
"#
\n
"
)
def
__write
(
self
,
st
):
self
.
__ofh
.
write
(
st
)
def
__writeItemValueFormat
(
self
,
myCategory
):
# Compute the maximum item name length within this category -
attributeNameLengthMax
=
0
for
attributeName
in
myCategory
.
getAttributeList
():
attributeNameLengthMax
=
max
(
attributeNameLengthMax
,
len
(
attributeName
))
itemNameLengthMax
=
self
.
__SPACING
+
len
(
myCategory
.
getName
())
+
attributeNameLengthMax
+
2
#
lineList
=
[]
lineList
.
append
(
"#
\n
"
)
for
attributeName
,
iPos
in
myCategory
.
getAttributeListWithOrder
():
if
self
.
__doDefinitionIndent
:
# - add indent --
lineList
.
append
(
self
.
__indentSpace
)
itemName
=
"_%s.%s"
%
(
myCategory
.
getName
(),
attributeName
)
lineList
.
append
(
itemName
.
ljust
(
itemNameLengthMax
))
lineList
.
append
(
myCategory
.
getValueFormatted
(
attributeName
,
0
))
lineList
.
append
(
"
\n
"
)
self
.
__write
(
""
.
join
(
lineList
))
def
__writeTableFormat
(
self
,
myCategory
):
# Write the declaration of the loop_
#
lineList
=
[]
lineList
.
append
(
'#
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
)
lineList
.
append
(
"loop_"
)
for
attributeName
in
myCategory
.
getAttributeList
():
lineList
.
append
(
'
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
)
itemName
=
"_%s.%s"
%
(
myCategory
.
getName
(),
attributeName
)
lineList
.
append
(
itemName
)
self
.
__write
(
""
.
join
(
lineList
))
#
# Write the data in tabular format -
#
#print myCategory.getName()
#print myCategory.getAttributeList()
# For speed make the following evaluation on a portion of the table
if
self
.
__rowPartition
is
not
None
:
numSteps
=
max
(
1
,
myCategory
.
getRowCount
()
/
self
.
__rowPartition
)
else
:
numSteps
=
1
formatTypeList
,
dataTypeList
=
myCategory
.
getFormatTypeList
(
steps
=
numSteps
)
maxLengthList
=
myCategory
.
getAttributeValueMaxLengthList
(
steps
=
numSteps
)
spacing
=
" "
*
self
.
__SPACING
#
#print formatTypeList
#print dataTypeList
#print maxLengthList
#
for
iRow
in
range
(
myCategory
.
getRowCount
()):
lineList
=
[]
lineList
.
append
(
'
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
+
" "
)
for
iAt
in
range
(
myCategory
.
getAttributeCount
()):
formatType
=
formatTypeList
[
iAt
]
maxLength
=
maxLengthList
[
iAt
]
if
(
formatType
==
'FT_UNQUOTED_STRING'
or
formatType
==
'FT_NULL_VALUE'
):
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
ljust
(
maxLength
))
elif
formatType
==
'FT_NUMBER'
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
rjust
(
maxLength
))
elif
formatType
==
'FT_QUOTED_STRING'
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
ljust
(
maxLength
+
2
))
elif
formatType
==
"FT_MULTI_LINE_STRING"
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
)
lineList
.
append
(
spacing
)
self
.
__write
(
""
.
join
(
lineList
))
self
.
__write
(
"
\n
"
)
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxWriterTests.py
# Author: jdw
# Date: 3-November-2009
# Version: 0.001
#
# Update:
# 5-Apr-2011 jdw Using the double quote format preference
# 24-Oct-2012 jdw Update path and examples.
##
"""
Test implementing PDBx/mmCIF write and formatting operations.
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
import
sys
,
unittest
,
traceback
import
sys
,
time
,
os
,
os
.
path
,
shutil
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app.internal.pdbx.writer.PdbxWriter
import
PdbxWriter
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxWriterTests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
lfh
=
sys
.
stderr
self
.
verbose
=
False
self
.
pathPdbxDataFile
=
"../tests/1kip.cif"
self
.
pathOutputFile
=
"testOutputDataFile.cif"
def
tearDown
(
self
):
pass
def
testWriteDataFile
(
self
):
"""Test case - write data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ofh
=
open
(
"test-output.cif"
,
"w"
)
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
curContainer
.
append
(
aCat
)
myDataList
.
append
(
curContainer
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testUpdateDataFile
(
self
):
"""Test case - write data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
# Create a initial data file --
#
myDataList
=
[]
ofh
=
open
(
"test-output-1.cif"
,
"w"
)
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
curContainer
.
append
(
aCat
)
myDataList
.
append
(
curContainer
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
#
# Read and update the data -
#
myDataList
=
[]
ifh
=
open
(
"test-output-1.cif"
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
#
myBlock
=
myDataList
[
0
]
myBlock
.
printIt
()
myCat
=
myBlock
.
getObj
(
'pdbx_seqtool_mapping_ref'
)
myCat
.
printIt
()
for
iRow
in
xrange
(
0
,
myCat
.
getRowCount
()):
myCat
.
setValue
(
'some value'
,
'ref_mon_id'
,
iRow
)
myCat
.
setValue
(
100
,
'ref_mon_num'
,
iRow
)
ofh
=
open
(
"test-output-2.cif"
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadWriteDataFile
(
self
):
"""Test case - data file read write test
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
ofh
=
open
(
self
.
pathOutputFile
,
"w"
)
pWr
=
PdbxWriter
(
ofh
)
pWr
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
suite
():
return
unittest
.
makeSuite
(
PdbxWriterTests
,
'test'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
0 → 100644
View file @
db72d0cb
wrappers/python/simtk/openmm/app/pdbxfile.py
0 → 100644
View file @
db72d0cb
"""
pdbfile.py: Used for loading PDB files.
This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2014 Stanford University and the Authors.
Authors: Peter Eastman
Contributors:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
__author__
=
"Peter Eastman"
__version__
=
"1.0"
import
os
import
sys
from
simtk.openmm
import
Vec3
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app
import
Topology
from
simtk.unit
import
nanometers
,
angstroms
,
is_quantity
,
norm
,
Quantity
import
element
as
elem
try
:
import
numpy
except
:
pass
class
PDBxFile
(
object
):
"""PDBxFile parses a PDBx/mmCIF file and constructs a Topology and a set of atom positions from it."""
def
__init__
(
self
,
file
):
"""Load a PDBx/mmCIF file.
The atom positions and Topology can be retrieved by calling getPositions() and getTopology().
Parameters:
- file (string) the name of the file to load. Alternatively you can pass an open file object.
"""
top
=
Topology
()
## The Topology read from the PDBx/mmCIF file
self
.
topology
=
top
self
.
_positions
=
[]
# Load the file.
inputFile
=
file
if
isinstance
(
file
,
str
):
inputFile
=
open
(
file
)
reader
=
PdbxReader
(
inputFile
)
data
=
[]
reader
.
read
(
data
)
block
=
data
[
0
]
# Build the topology.
atomData
=
block
.
getObj
(
'atom_site'
)
atomNameCol
=
atomData
.
getAttributeIndex
(
'label_atom_id'
)
atomIdCol
=
atomData
.
getAttributeIndex
(
'id'
)
resNameCol
=
atomData
.
getAttributeIndex
(
'label_comp_id'
)
resIdCol
=
atomData
.
getAttributeIndex
(
'label_seq_id'
)
asymIdCol
=
atomData
.
getAttributeIndex
(
'label_asym_id'
)
chainIdCol
=
atomData
.
getAttributeIndex
(
'label_entity_id'
)
elementCol
=
atomData
.
getAttributeIndex
(
'type_symbol'
)
modelCol
=
atomData
.
getAttributeIndex
(
'pdbx_PDB_model_num'
)
xCol
=
atomData
.
getAttributeIndex
(
'Cartn_x'
)
yCol
=
atomData
.
getAttributeIndex
(
'Cartn_y'
)
zCol
=
atomData
.
getAttributeIndex
(
'Cartn_z'
)
lastChainId
=
None
lastResId
=
None
lastAsymId
=
None
atomTable
=
{}
models
=
[]
for
row
in
atomData
.
getRowList
():
asymId
=
(
'A'
if
asymIdCol
==
-
1
else
row
[
asymIdCol
])
atomKey
=
((
row
[
resIdCol
],
asymId
,
row
[
atomNameCol
]))
model
=
(
'1'
if
modelCol
==
-
1
else
row
[
modelCol
])
if
model
not
in
models
:
models
.
append
(
model
)
self
.
_positions
.
append
([])
modelIndex
=
models
.
index
(
model
)
if
modelIndex
==
0
:
# This row defines a new atom.
if
lastChainId
!=
row
[
chainIdCol
]:
# The start of a new chain.
chain
=
top
.
addChain
()
lastChainId
=
row
[
chainIdCol
]
lastResId
=
None
lastAsymId
=
None
if
lastResId
!=
row
[
resIdCol
]
or
lastAsymId
!=
asymId
:
# The start of a new residue.
res
=
top
.
addResidue
(
row
[
resNameCol
],
chain
)
lastResId
=
row
[
resIdCol
]
lastAsymId
=
asymId
element
=
None
try
:
element
=
elem
.
get_by_symbol
(
row
[
elementCol
])
except
KeyError
:
pass
atom
=
top
.
addAtom
(
row
[
atomNameCol
],
element
,
res
)
atomTable
[
atomKey
]
=
atom
else
:
# This row defines coordinates for an existing atom in one of the later models.
try
:
atom
=
atomTable
[
atomKey
]
except
KeyError
:
raise
ValueError
(
'Unknown atom %s in residue %s %s for model %s'
%
(
row
[
atomNameCol
],
row
[
resNameCol
],
row
[
resIdCol
],
model
))
if
atom
.
index
!=
len
(
self
.
_positions
[
modelIndex
]):
raise
ValueError
(
'Atom %s for model %s does not match the order of atoms for model %s'
%
(
row
[
atomIdCol
],
model
,
models
[
0
]))
self
.
_positions
[
modelIndex
].
append
(
Vec3
(
float
(
row
[
xCol
]),
float
(
row
[
yCol
]),
float
(
row
[
zCol
]))
*
0.1
)
for
i
in
range
(
len
(
self
.
_positions
)):
self
.
_positions
[
i
]
=
self
.
_positions
[
i
]
*
nanometers
## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame.
self
.
positions
=
self
.
_positions
[
0
]
self
.
topology
.
createStandardBonds
()
self
.
topology
.
createDisulfideBonds
(
self
.
positions
)
self
.
_numpyPositions
=
None
# Record unit cell information, if present.
cell
=
block
.
getObj
(
'cell'
)
if
cell
is
not
None
and
cell
.
getRowCount
()
>
0
:
row
=
cell
.
getRow
(
0
)
cellSize
=
[
float
(
row
[
cell
.
getAttributeIndex
(
attribute
)])
for
attribute
in
(
'length_a'
,
'length_b'
,
'length_c'
)]
*
angstroms
self
.
topology
.
setUnitCellDimensions
(
cellSize
)
# Add bonds based on struct_conn records.
connectData
=
block
.
getObj
(
'struct_conn'
)
if
connectData
is
not
None
:
res1Col
=
connectData
.
getAttributeIndex
(
'ptnr1_label_seq_id'
)
res2Col
=
connectData
.
getAttributeIndex
(
'ptnr2_label_seq_id'
)
atom1Col
=
connectData
.
getAttributeIndex
(
'ptnr1_label_atom_id'
)
atom2Col
=
connectData
.
getAttributeIndex
(
'ptnr2_label_atom_id'
)
asym1Col
=
connectData
.
getAttributeIndex
(
'ptnr1_label_asym_id'
)
asym2Col
=
connectData
.
getAttributeIndex
(
'ptnr2_label_asym_id'
)
typeCol
=
connectData
.
getAttributeIndex
(
'conn_type_id'
)
connectBonds
=
[]
for
row
in
connectData
.
getRowList
():
type
=
row
[
typeCol
][:
6
]
if
type
in
(
'covale'
,
'disulf'
,
'modres'
):
key1
=
(
row
[
res1Col
],
row
[
asym1Col
],
row
[
atom1Col
])
key2
=
(
row
[
res2Col
],
row
[
asym2Col
],
row
[
atom2Col
])
if
key1
in
atomTable
and
key2
in
atomTable
:
connectBonds
.
append
((
atomTable
[
key1
],
atomTable
[
key2
]))
if
len
(
connectBonds
)
>
0
:
# Only add bonds that don't already exist.
existingBonds
=
set
(
top
.
bonds
())
for
bond
in
connectBonds
:
if
bond
not
in
existingBonds
and
(
bond
[
1
],
bond
[
0
])
not
in
existingBonds
:
top
.
addBond
(
bond
[
0
],
bond
[
1
])
existingBonds
.
add
(
bond
)
def
getTopology
(
self
):
"""Get the Topology of the model."""
return
self
.
topology
def
getNumFrames
(
self
):
"""Get the number of frames stored in the file."""
return
len
(
self
.
_positions
)
def
getPositions
(
self
,
asNumpy
=
False
,
frame
=
0
):
"""Get the atomic positions.
Parameters:
- asNumpy (boolean=False) if true, the values are returned as a numpy array instead of a list of Vec3s
- frame (int=0) the index of the frame for which to get positions
"""
if
asNumpy
:
if
self
.
_numpyPositions
is
None
:
self
.
_numpyPositions
=
[
None
]
*
len
(
self
.
_positions
)
if
self
.
_numpyPositions
[
frame
]
is
None
:
self
.
_numpyPositions
[
frame
]
=
Quantity
(
numpy
.
array
(
self
.
_positions
[
frame
].
value_in_unit
(
nanometers
)),
nanometers
)
return
self
.
_numpyPositions
[
frame
]
return
self
.
_positions
[
frame
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment