Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
db72d0cb
Commit
db72d0cb
authored
Nov 06, 2014
by
peastman
Browse files
Merge pull request #702 from peastman/pdbx
Created reader for PDBx/mmCIF files
parents
4ab3b428
49722158
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
2844 additions
and
2 deletions
+2844
-2
docs-source/licenses/Licenses.txt
docs-source/licenses/Licenses.txt
+9
-1
wrappers/python/setup.py
wrappers/python/setup.py
+4
-1
wrappers/python/simtk/openmm/app/__init__.py
wrappers/python/simtk/openmm/app/__init__.py
+1
-0
wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
+0
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
...n/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
+819
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
...ython/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
+638
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
...mtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
+236
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
...ython/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
+461
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
.../simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
+126
-0
wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
.../python/simtk/openmm/app/internal/pdbx/reader/__init__.py
+0
-0
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
...ython/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
+191
-0
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
.../simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
+165
-0
wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
.../python/simtk/openmm/app/internal/pdbx/writer/__init__.py
+0
-0
wrappers/python/simtk/openmm/app/pdbxfile.py
wrappers/python/simtk/openmm/app/pdbxfile.py
+194
-0
No files found.
docs-source/licenses/Licenses.txt
View file @
db72d0cb
...
...
@@ -2,7 +2,7 @@ OpenMM was developed by Simbios, the NIH National Center for Physics-Based
Simulation of Biological Structures at Stanford, funded under the NIH Roadmap
for Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright
©
2008-2014 Stanford University and the Authors.
Portions copyright
� 2
008-2014 Stanford University and the Authors.
There are several licenses which cover different parts of OpenMM as described
below.
...
...
@@ -119,3 +119,11 @@ freely, subject to the following restrictions:
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
6. PdbxReader
OpenMM uses the PDBx/mmCIF parser written by John Westbrook. It is distributed
under the Creative Commons Attribution 3.0 Unported license. For details, see
https://creativecommons.org/licenses/by/3.0. This library was modified to move
it inside the simtk.openmm.app.internal module.
\ No newline at end of file
wrappers/python/setup.py
View file @
db72d0cb
...
...
@@ -145,7 +145,10 @@ def buildKeywordDictionary(major_version_num=MAJOR_VERSION_NUM,
"simtk.openmm"
,
"simtk.openmm.app"
,
"simtk.openmm.app.internal"
,
"simtk.openmm.app.internal.charmm"
]
"simtk.openmm.app.internal.charmm"
,
"simtk.openmm.app.internal.pdbx"
,
"simtk.openmm.app.internal.pdbx.reader"
,
"simtk.openmm.app.internal.pdbx.writer"
]
setupKeywords
[
"data_files"
]
=
[]
setupKeywords
[
"package_data"
]
=
{
"simtk"
:
[],
"simtk.unit"
:
[],
...
...
wrappers/python/simtk/openmm/app/__init__.py
View file @
db72d0cb
...
...
@@ -12,6 +12,7 @@ __email__ = "peastman@stanford.edu"
from
topology
import
Topology
,
Chain
,
Residue
,
Atom
from
pdbfile
import
PDBFile
from
pdbxfile
import
PDBxFile
from
forcefield
import
ForceField
from
simulation
import
Simulation
from
pdbreporter
import
PDBReporter
...
...
wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
0 → 100644
View file @
db72d0cb
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
0 → 100644
View file @
db72d0cb
##
#
# File: PdbxContainers.py
# Original: 02-Feb-2009 jdw
#
# Update:
# 23-Mar-2011 jdw Added method to rename attributes in category containers.
# 05-Apr-2011 jdw Change cif writer to select double quoting as preferred
# quoting style where possible.
# 16-Jan-2012 jdw Create base class for DataCategory class
# 22-Mar-2012 jdw when append attributes to existing categories update
# existing rows with placeholder null values.
# 2-Sep-2012 jdw add option to avoid embedded quoting that might
# confuse simple parsers.
# 28-Jun-2013 jdw export remove method
# 29-Jun-2013 jdw export remove row method
##
"""
A collection of container classes supporting the PDBx/mmCIF storage model.
A base container class is defined which supports common features of
data and definition containers. PDBx data files are organized in
sections called data blocks which are mapped to data containers.
PDBx dictionaries contain definition sections and data sections
which are mapped to definition and data containes respectively.
Data in both PDBx data files and dictionaries are organized in
data categories. In the PDBx syntax individual items or data
identified by labels of the form '_categoryName.attributeName'.
The terms category and attribute in PDBx jargon are analogous
table and column in relational data model, or class and attribute
in an object oriented data model.
The DataCategory class provides base storage container for instance
data and definition meta data.
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
import
re
,
sys
,
traceback
class
CifName
(
object
):
''' Class of utilities for CIF-style data names -
'''
def
__init__
(
self
):
pass
@
staticmethod
def
categoryPart
(
name
):
tname
=
""
if
name
.
startswith
(
"_"
):
tname
=
name
[
1
:]
else
:
tname
=
name
i
=
tname
.
find
(
"."
)
if
i
==
-
1
:
return
tname
else
:
return
tname
[:
i
]
@
staticmethod
def
attributePart
(
name
):
i
=
name
.
find
(
"."
)
if
i
==
-
1
:
return
None
else
:
return
name
[
i
+
1
:]
class
ContainerBase
(
object
):
''' Container base class for data and definition objects.
'''
def
__init__
(
self
,
name
):
# The enclosing scope of the data container (e.g. data_/save_)
self
.
__name
=
name
# List of category names within this container -
self
.
__objNameList
=
[]
# dictionary of DataCategory objects keyed by category name.
self
.
__objCatalog
=
{}
self
.
__type
=
None
def
getType
(
self
):
return
self
.
__type
def
setType
(
self
,
type
):
self
.
__type
=
type
def
getName
(
self
):
return
self
.
__name
def
setName
(
self
,
name
):
self
.
__name
=
name
def
exists
(
self
,
name
):
if
self
.
__objCatalog
.
has_key
(
name
):
return
True
else
:
return
False
def
getObj
(
self
,
name
):
if
self
.
__objCatalog
.
has_key
(
name
):
return
self
.
__objCatalog
[
name
]
else
:
return
None
def
getObjNameList
(
self
):
return
self
.
__objNameList
def
append
(
self
,
obj
):
""" Add the input object to the current object catalog. An existing object
of the same name will be overwritten.
"""
if
obj
.
getName
()
is
not
None
:
if
not
self
.
__objCatalog
.
has_key
(
obj
.
getName
()):
# self.__objNameList is keeping track of object order here --
self
.
__objNameList
.
append
(
obj
.
getName
())
self
.
__objCatalog
[
obj
.
getName
()]
=
obj
def
replace
(
self
,
obj
):
""" Replace an existing object with the input object
"""
if
((
obj
.
getName
()
is
not
None
)
and
(
self
.
__objCatalog
.
has_key
(
obj
.
getName
()))
):
self
.
__objCatalog
[
obj
.
getName
()]
=
obj
def
printIt
(
self
,
fh
=
sys
.
stdout
,
type
=
"brief"
):
fh
.
write
(
"+ %s container: %30s contains %4d categories
\n
"
%
(
self
.
getType
(),
self
.
getName
(),
len
(
self
.
__objNameList
)))
for
nm
in
self
.
__objNameList
:
fh
.
write
(
"--------------------------------------------
\n
"
)
fh
.
write
(
"Data category: %s
\n
"
%
nm
)
if
type
==
'brief'
:
self
.
__objCatalog
[
nm
].
printIt
(
fh
)
else
:
self
.
__objCatalog
[
nm
].
dumpIt
(
fh
)
def
rename
(
self
,
curName
,
newName
):
""" Change the name of an object in place -
"""
try
:
i
=
self
.
__objNameList
.
index
(
curName
)
self
.
__objNameList
[
i
]
=
newName
self
.
__objCatalog
[
newName
]
=
self
.
__objCatalog
[
curName
]
self
.
__objCatalog
[
newName
].
setName
(
newName
)
return
True
except
:
return
False
def
remove
(
self
,
curName
):
""" Revmove object by name. Return True on success or False otherwise.
"""
try
:
if
self
.
__objCatalog
.
has_key
(
curName
):
del
self
.
__objCatalog
[
curName
]
i
=
self
.
__objNameList
.
index
(
curName
)
del
self
.
__objNameList
[
i
]
return
True
else
:
return
False
except
:
pass
return
False
class
DefinitionContainer
(
ContainerBase
):
def
__init__
(
self
,
name
):
super
(
DefinitionContainer
,
self
).
__init__
(
name
)
self
.
setType
(
'definition'
)
def
isCategory
(
self
):
if
self
.
exists
(
'category'
):
return
True
return
False
def
isAttribute
(
self
):
if
self
.
exists
(
'item'
):
return
True
return
False
def
printIt
(
self
,
fh
=
sys
.
stdout
,
type
=
"brief"
):
fh
.
write
(
"Definition container: %30s contains %4d categories
\n
"
%
(
self
.
getName
(),
len
(
self
.
getObjNameList
())))
if
self
.
isCategory
():
fh
.
write
(
"Definition type: category
\n
"
)
elif
self
.
isAttribute
():
fh
.
write
(
"Definition type: item
\n
"
)
else
:
fh
.
write
(
"Definition type: undefined
\n
"
)
for
nm
in
self
.
getObjNameList
():
fh
.
write
(
"--------------------------------------------
\n
"
)
fh
.
write
(
"Definition category: %s
\n
"
%
nm
)
if
type
==
'brief'
:
self
.
getObj
(
nm
).
printIt
(
fh
)
else
:
self
.
getObj
(
nm
).
dumpId
(
fh
)
class
DataContainer
(
ContainerBase
):
''' Container class for DataCategory objects.
'''
def
__init__
(
self
,
name
):
super
(
DataContainer
,
self
).
__init__
(
name
)
self
.
setType
(
'data'
)
self
.
__globalFlag
=
False
def
invokeDataBlockMethod
(
self
,
type
,
method
,
db
):
self
.
__currentRow
=
1
exec
method
.
getInline
()
def
setGlobal
(
self
):
self
.
__globalFlag
=
True
def
getGlobal
(
self
):
return
self
.
__globalFlag
class
DataCategoryBase
(
object
):
""" Base object definition for a data category -
"""
def
__init__
(
self
,
name
,
attributeNameList
=
None
,
rowList
=
None
):
self
.
_name
=
name
#
if
rowList
is
not
None
:
self
.
_rowList
=
rowList
else
:
self
.
_rowList
=
[]
if
attributeNameList
is
not
None
:
self
.
_attributeNameList
=
attributeNameList
else
:
self
.
_attributeNameList
=
[]
#
# Derived class data -
#
self
.
_catalog
=
{}
self
.
_numAttributes
=
0
#
self
.
__setup
()
def
__setup
(
self
):
self
.
_numAttributes
=
len
(
self
.
_attributeNameList
)
self
.
_catalog
=
{}
for
attributeName
in
self
.
_attributeNameList
:
attributeNameLC
=
attributeName
.
lower
()
self
.
_catalog
[
attributeNameLC
]
=
attributeName
#
def
setRowList
(
self
,
rowList
):
self
.
_rowList
=
rowList
def
setAttributeNameList
(
self
,
attributeNameList
):
self
.
_attributeNameList
=
attributeNameList
self
.
__setup
()
def
setName
(
self
,
name
):
self
.
_name
=
name
def
get
(
self
):
return
(
self
.
_name
,
self
.
_attributeNameList
,
self
.
_rowList
)
class
DataCategory
(
DataCategoryBase
):
""" Methods for creating, accessing, and formatting PDBx cif data categories.
"""
def
__init__
(
self
,
name
,
attributeNameList
=
None
,
rowList
=
None
):
super
(
DataCategory
,
self
).
__init__
(
name
,
attributeNameList
,
rowList
)
#
self
.
__lfh
=
sys
.
stdout
self
.
__currentRowIndex
=
0
self
.
__currentAttribute
=
None
#
self
.
__avoidEmbeddedQuoting
=
False
#
# --------------------------------------------------------------------
# any whitespace
self
.
__wsRe
=
re
.
compile
(
r
"\s"
)
self
.
__wsAndQuotesRe
=
re
.
compile
(
r
"[\s'\"]"
)
# any newline or carriage control
self
.
__nlRe
=
re
.
compile
(
r
"[\n\r]"
)
#
# single quote
self
.
__sqRe
=
re
.
compile
(
r
"[']"
)
#
self
.
__sqWsRe
=
re
.
compile
(
r
"('\s)|(\s')"
)
# double quote
self
.
__dqRe
=
re
.
compile
(
r
'["]'
)
self
.
__dqWsRe
=
re
.
compile
(
r
'("\s)|(\s")'
)
#
self
.
__intRe
=
re
.
compile
(
r
'^[0-9]+$'
)
self
.
__floatRe
=
re
.
compile
(
r
'^-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)?$'
)
#
self
.
__dataTypeList
=
[
'DT_NULL_VALUE'
,
'DT_INTEGER'
,
'DT_FLOAT'
,
'DT_UNQUOTED_STRING'
,
'DT_ITEM_NAME'
,
'DT_DOUBLE_QUOTED_STRING'
,
'DT_SINGLE_QUOTED_STRING'
,
'DT_MULTI_LINE_STRING'
]
self
.
__formatTypeList
=
[
'FT_NULL_VALUE'
,
'FT_NUMBER'
,
'FT_NUMBER'
,
'FT_UNQUOTED_STRING'
,
'FT_QUOTED_STRING'
,
'FT_QUOTED_STRING'
,
'FT_QUOTED_STRING'
,
'FT_MULTI_LINE_STRING'
]
#
def
__getitem__
(
self
,
x
):
""" Implements list-type functionality -
Implements op[x] for some special cases -
x=integer - returns the row in category (normal list behavior)
x=string - returns the value of attribute 'x' in first row.
"""
if
isinstance
(
x
,
int
):
#return self._rowList.__getitem__(x)
return
self
.
_rowList
[
x
]
elif
isinstance
(
x
,
str
):
try
:
#return self._rowList[0][x]
ii
=
self
.
getAttributeIndex
(
x
)
return
self
.
_rowList
[
0
][
ii
]
except
(
IndexError
,
KeyError
):
raise
KeyError
raise
TypeError
,
x
def
getCurrentAttribute
(
self
):
return
self
.
__currentAttribute
def
getRowIndex
(
self
):
return
self
.
__currentRowIndex
def
getRowList
(
self
):
return
self
.
_rowList
def
getRowCount
(
self
):
return
(
len
(
self
.
_rowList
))
def
getRow
(
self
,
index
):
try
:
return
self
.
_rowList
[
index
]
except
:
return
[]
def
removeRow
(
self
,
index
):
try
:
if
((
index
>=
0
)
and
(
index
<
len
(
self
.
_rowList
))):
del
self
.
_rowList
[
index
]
if
self
.
__currentRowIndex
>=
len
(
self
.
_rowList
):
self
.
__currentRowIndex
=
len
(
self
.
_rowList
)
-
1
return
True
else
:
pass
except
:
pass
return
False
def
getFullRow
(
self
,
index
):
""" Return a full row based on the length of the the attribute list.
"""
try
:
if
(
len
(
self
.
_rowList
[
index
])
<
self
.
_numAttributes
):
for
ii
in
range
(
self
.
_numAttributes
-
len
(
self
.
_rowList
[
index
])):
self
.
_rowList
[
index
].
append
(
'?'
)
return
self
.
_rowList
[
index
]
except
:
return
[
'?'
for
ii
in
range
(
self
.
_numAttributes
)]
def
getName
(
self
):
return
self
.
_name
def
getAttributeList
(
self
):
return
self
.
_attributeNameList
def
getAttributeCount
(
self
):
return
len
(
self
.
_attributeNameList
)
def
getAttributeListWithOrder
(
self
):
oL
=
[]
for
ii
,
att
in
enumerate
(
self
.
_attributeNameList
):
oL
.
append
((
att
,
ii
))
return
oL
def
getAttributeIndex
(
self
,
attributeName
):
try
:
return
self
.
_attributeNameList
.
index
(
attributeName
)
except
:
return
-
1
def
hasAttribute
(
self
,
attributeName
):
return
attributeName
in
self
.
_attributeNameList
def
getIndex
(
self
,
attributeName
):
try
:
return
self
.
_attributeNameList
.
index
(
attributeName
)
except
:
return
-
1
def
getItemNameList
(
self
):
itemNameList
=
[]
for
att
in
self
.
_attributeNameList
:
itemNameList
.
append
(
"_"
+
self
.
_name
+
"."
+
att
)
return
itemNameList
def
append
(
self
,
row
):
#self.__lfh.write("PdbxContainer(append) category %s row %r\n" % (self._name,row))
self
.
_rowList
.
append
(
row
)
def
appendAttribute
(
self
,
attributeName
):
attributeNameLC
=
attributeName
.
lower
()
if
attributeNameLC
in
self
.
_catalog
:
i
=
self
.
_attributeNameList
.
index
(
self
.
_catalog
[
attributeNameLC
])
self
.
_attributeNameList
[
i
]
=
attributeName
self
.
_catalog
[
attributeNameLC
]
=
attributeName
#self.__lfh.write("Appending existing attribute %s\n" % attributeName)
else
:
#self.__lfh.write("Appending existing attribute %s\n" % attributeName)
self
.
_attributeNameList
.
append
(
attributeName
)
self
.
_catalog
[
attributeNameLC
]
=
attributeName
#
self
.
_numAttributes
=
len
(
self
.
_attributeNameList
)
def
appendAttributeExtendRows
(
self
,
attributeName
):
attributeNameLC
=
attributeName
.
lower
()
if
attributeNameLC
in
self
.
_catalog
:
i
=
self
.
_attributeNameList
.
index
(
self
.
_catalog
[
attributeNameLC
])
self
.
_attributeNameList
[
i
]
=
attributeName
self
.
_catalog
[
attributeNameLC
]
=
attributeName
self
.
__lfh
.
write
(
"Appending existing attribute %s
\n
"
%
attributeName
)
else
:
self
.
_attributeNameList
.
append
(
attributeName
)
self
.
_catalog
[
attributeNameLC
]
=
attributeName
# add a placeholder to any existing rows for the new attribute.
if
(
len
(
self
.
_rowList
)
>
0
):
for
row
in
self
.
_rowList
:
row
.
append
(
"?"
)
#
self
.
_numAttributes
=
len
(
self
.
_attributeNameList
)
def
getValue
(
self
,
attributeName
=
None
,
rowIndex
=
None
):
if
attributeName
is
None
:
attribute
=
self
.
__currentAttribute
else
:
attribute
=
attributeName
if
rowIndex
is
None
:
rowI
=
self
.
__currentRowIndex
else
:
rowI
=
rowIndex
if
isinstance
(
attribute
,
str
)
and
isinstance
(
rowI
,
int
):
try
:
return
self
.
_rowList
[
rowI
][
self
.
_attributeNameList
.
index
(
attribute
)]
except
(
IndexError
):
raise
IndexError
raise
IndexError
,
attribute
def
setValue
(
self
,
value
,
attributeName
=
None
,
rowIndex
=
None
):
if
attributeName
is
None
:
attribute
=
self
.
__currentAttribute
else
:
attribute
=
attributeName
if
rowIndex
is
None
:
rowI
=
self
.
__currentRowIndex
else
:
rowI
=
rowIndex
if
isinstance
(
attribute
,
str
)
and
isinstance
(
rowI
,
int
):
try
:
# if row index is out of range - add the rows -
for
ii
in
range
(
rowI
+
1
-
len
(
self
.
_rowList
)):
self
.
_rowList
.
append
(
self
.
__emptyRow
())
# self._rowList[rowI][attribute]=value
ll
=
len
(
self
.
_rowList
[
rowI
])
ind
=
self
.
_attributeNameList
.
index
(
attribute
)
# extend the list if needed -
if
(
ind
>=
ll
):
self
.
_rowList
[
rowI
].
extend
([
None
for
ii
in
xrange
(
2
*
ind
-
ll
)])
self
.
_rowList
[
rowI
][
ind
]
=
value
except
(
IndexError
):
self
.
__lfh
.
write
(
"DataCategory(setvalue) index error category %s attribute %s index %d value %r
\n
"
%
(
self
.
_name
,
attribute
,
rowI
,
value
))
traceback
.
print_exc
(
file
=
self
.
__lfh
)
#raise IndexError
except
(
ValueError
):
self
.
__lfh
.
write
(
"DataCategory(setvalue) value error category %s attribute %s index %d value %r
\n
"
%
(
self
.
_name
,
attribute
,
rowI
,
value
))
traceback
.
print_exc
(
file
=
self
.
__lfh
)
#raise ValueError
def
__emptyRow
(
self
):
return
[
None
for
ii
in
range
(
len
(
self
.
_attributeNameList
))]
def
replaceValue
(
self
,
oldValue
,
newValue
,
attributeName
):
numReplace
=
0
if
attributeName
not
in
self
.
_attributeNameList
:
return
numReplace
ind
=
self
.
_attributeNameList
.
index
(
attributeName
)
for
row
in
self
.
_rowList
:
if
row
[
ind
]
==
oldValue
:
row
[
ind
]
=
newValue
numReplace
+=
1
return
numReplace
def
replaceSubstring
(
self
,
oldValue
,
newValue
,
attributeName
):
ok
=
False
if
attributeName
not
in
self
.
_attributeNameList
:
return
ok
ind
=
self
.
_attributeNameList
.
index
(
attributeName
)
for
row
in
self
.
_rowList
:
val
=
row
[
ind
]
row
[
ind
]
=
val
.
replace
(
oldValue
,
newValue
)
if
val
!=
row
[
ind
]:
ok
=
True
return
ok
def
invokeAttributeMethod
(
self
,
attributeName
,
type
,
method
,
db
):
self
.
__currentRowIndex
=
0
self
.
__currentAttribute
=
attributeName
self
.
appendAttribute
(
attributeName
)
currentRowIndex
=
self
.
__currentRowIndex
#
ind
=
self
.
_attributeNameList
.
index
(
attributeName
)
if
len
(
self
.
_rowList
)
==
0
:
row
=
[
None
for
ii
in
xrange
(
len
(
self
.
_attributeNameList
)
*
2
)]
row
[
ind
]
=
None
self
.
_rowList
.
append
(
row
)
for
row
in
self
.
_rowList
:
ll
=
len
(
row
)
if
(
ind
>=
ll
):
row
.
extend
([
None
for
ii
in
xrange
(
2
*
ind
-
ll
)])
row
[
ind
]
=
None
exec
method
.
getInline
()
self
.
__currentRowIndex
+=
1
currentRowIndex
=
self
.
__currentRowIndex
def
invokeCategoryMethod
(
self
,
type
,
method
,
db
):
self
.
__currentRowIndex
=
0
exec
method
.
getInline
()
def
getAttributeLengthMaximumList
(
self
):
mList
=
[
0
for
i
in
len
(
self
.
_attributeNameList
)]
for
row
in
self
.
_rowList
:
for
indx
,
val
in
enumerate
(
row
):
mList
[
indx
]
=
max
(
mList
[
indx
],
len
(
val
))
return
mList
def
renameAttribute
(
self
,
curAttributeName
,
newAttributeName
):
""" Change the name of an attribute in place -
"""
try
:
i
=
self
.
_attributeNameList
.
index
(
curAttributeName
)
self
.
_attributeNameList
[
i
]
=
newAttributeName
del
self
.
_catalog
[
curAttributeName
.
lower
()]
self
.
_catalog
[
newAttributeName
.
lower
()]
=
newAttributeName
return
True
except
:
return
False
def
printIt
(
self
,
fh
=
sys
.
stdout
):
fh
.
write
(
"--------------------------------------------
\n
"
)
fh
.
write
(
" Category: %s attribute list length: %d
\n
"
%
(
self
.
_name
,
len
(
self
.
_attributeNameList
)))
for
at
in
self
.
_attributeNameList
:
fh
.
write
(
" Category: %s attribute: %s
\n
"
%
(
self
.
_name
,
at
))
fh
.
write
(
" Row value list length: %d
\n
"
%
len
(
self
.
_rowList
))
#
for
row
in
self
.
_rowList
[:
2
]:
#
if
len
(
row
)
==
len
(
self
.
_attributeNameList
):
for
ii
,
v
in
enumerate
(
row
):
fh
.
write
(
" %30s: %s ...
\n
"
%
(
self
.
_attributeNameList
[
ii
],
str
(
v
)[:
30
]))
else
:
fh
.
write
(
"+WARNING - %s data length %d attribute name length %s mismatched
\n
"
%
(
self
.
_name
,
len
(
row
),
len
(
self
.
_attributeNameList
)))
def
dumpIt
(
self
,
fh
=
sys
.
stdout
):
fh
.
write
(
"--------------------------------------------
\n
"
)
fh
.
write
(
" Category: %s attribute list length: %d
\n
"
%
(
self
.
_name
,
len
(
self
.
_attributeNameList
)))
for
at
in
self
.
_attributeNameList
:
fh
.
write
(
" Category: %s attribute: %s
\n
"
%
(
self
.
_name
,
at
))
fh
.
write
(
" Value list length: %d
\n
"
%
len
(
self
.
_rowList
))
for
row
in
self
.
_rowList
:
for
ii
,
v
in
enumerate
(
row
):
fh
.
write
(
" %30s: %s
\n
"
%
(
self
.
_attributeNameList
[
ii
],
v
))
def
__formatPdbx
(
self
,
inp
):
""" Format input data following PDBx quoting rules -
"""
try
:
if
(
inp
is
None
):
return
(
"?"
,
'DT_NULL_VALUE'
)
# pure numerical values are returned as unquoted strings
if
(
isinstance
(
inp
,
int
)
or
self
.
__intRe
.
search
(
str
(
inp
))):
return
(
[
str
(
inp
)],
'DT_INTEGER'
)
if
(
isinstance
(
inp
,
float
)
or
self
.
__floatRe
.
search
(
str
(
inp
))):
return
([
str
(
inp
)],
'DT_FLOAT'
)
# null value handling -
if
(
inp
==
"."
or
inp
==
"?"
):
return
([
inp
],
'DT_NULL_VALUE'
)
if
(
inp
==
""
):
return
([
"."
],
'DT_NULL_VALUE'
)
# Contains white space or quotes ?
if
not
self
.
__wsAndQuotesRe
.
search
(
inp
):
if
inp
.
startswith
(
"_"
):
return
(
self
.
__doubleQuotedList
(
inp
),
'DT_ITEM_NAME'
)
else
:
return
([
str
(
inp
)],
'DT_UNQUOTED_STRING'
)
else
:
if
self
.
__nlRe
.
search
(
inp
):
return
(
self
.
__semiColonQuotedList
(
inp
),
'DT_MULTI_LINE_STRING'
)
else
:
if
(
self
.
__avoidEmbeddedQuoting
):
# change priority to choose double quoting where possible.
if
not
self
.
__dqRe
.
search
(
inp
)
and
not
self
.
__sqWsRe
.
search
(
inp
):
return
(
self
.
__doubleQuotedList
(
inp
),
'DT_DOUBLE_QUOTED_STRING'
)
elif
not
self
.
__sqRe
.
search
(
inp
)
and
not
self
.
__dqWsRe
.
search
(
inp
):
return
(
self
.
__singleQuotedList
(
inp
),
'DT_SINGLE_QUOTED_STRING'
)
else
:
return
(
self
.
__semiColonQuotedList
(
inp
),
'DT_MULTI_LINE_STRING'
)
else
:
# change priority to choose double quoting where possible.
if
not
self
.
__dqRe
.
search
(
inp
):
return
(
self
.
__doubleQuotedList
(
inp
),
'DT_DOUBLE_QUOTED_STRING'
)
elif
not
self
.
__sqRe
.
search
(
inp
):
return
(
self
.
__singleQuotedList
(
inp
),
'DT_SINGLE_QUOTED_STRING'
)
else
:
return
(
self
.
__semiColonQuotedList
(
inp
),
'DT_MULTI_LINE_STRING'
)
except
:
traceback
.
print_exc
(
file
=
self
.
__lfh
)
def
__dataTypePdbx
(
self
,
inp
):
""" Detect the PDBx data type -
"""
if
(
inp
is
None
):
return
(
'DT_NULL_VALUE'
)
# pure numerical values are returned as unquoted strings
if
isinstance
(
inp
,
int
)
or
self
.
__intRe
.
search
(
str
(
inp
)):
return
(
'DT_INTEGER'
)
if
isinstance
(
inp
,
float
)
or
self
.
__floatRe
.
search
(
str
(
inp
)):
return
(
'DT_FLOAT'
)
# null value handling -
if
(
inp
==
"."
or
inp
==
"?"
):
return
(
'DT_NULL_VALUE'
)
if
(
inp
==
""
):
return
(
'DT_NULL_VALUE'
)
# Contains white space or quotes ?
if
not
self
.
__wsAndQuotesRe
.
search
(
inp
):
if
inp
.
startswith
(
"_"
):
return
(
'DT_ITEM_NAME'
)
else
:
return
(
'DT_UNQUOTED_STRING'
)
else
:
if
self
.
__nlRe
.
search
(
inp
):
return
(
'DT_MULTI_LINE_STRING'
)
else
:
if
(
self
.
__avoidEmbeddedQuoting
):
if
not
self
.
__sqRe
.
search
(
inp
)
and
not
self
.
__dqWsRe
.
search
(
inp
):
return
(
'DT_DOUBLE_QUOTED_STRING'
)
elif
not
self
.
__dqRe
.
search
(
inp
)
and
not
self
.
__sqWsRe
.
search
(
inp
):
return
(
'DT_SINGLE_QUOTED_STRING'
)
else
:
return
(
'DT_MULTI_LINE_STRING'
)
else
:
if
not
self
.
__sqRe
.
search
(
inp
):
return
(
'DT_DOUBLE_QUOTED_STRING'
)
elif
not
self
.
__dqRe
.
search
(
inp
):
return
(
'DT_SINGLE_QUOTED_STRING'
)
else
:
return
(
'DT_MULTI_LINE_STRING'
)
def
__singleQuotedList
(
self
,
inp
):
l
=
[]
l
.
append
(
"'"
)
l
.
append
(
inp
)
l
.
append
(
"'"
)
return
(
l
)
def
__doubleQuotedList
(
self
,
inp
):
l
=
[]
l
.
append
(
'"'
)
l
.
append
(
inp
)
l
.
append
(
'"'
)
return
(
l
)
def
__semiColonQuotedList
(
self
,
inp
):
l
=
[]
l
.
append
(
"
\n
"
)
if
inp
[
-
1
]
==
'
\n
'
:
l
.
append
(
";"
)
l
.
append
(
inp
)
l
.
append
(
";"
)
l
.
append
(
"
\n
"
)
else
:
l
.
append
(
";"
)
l
.
append
(
inp
)
l
.
append
(
"
\n
"
)
l
.
append
(
";"
)
l
.
append
(
"
\n
"
)
return
(
l
)
def
getValueFormatted
(
self
,
attributeName
=
None
,
rowIndex
=
None
):
if
attributeName
is
None
:
attribute
=
self
.
__currentAttribute
else
:
attribute
=
attributeName
if
rowIndex
is
None
:
rowI
=
self
.
__currentRowIndex
else
:
rowI
=
rowIndex
if
isinstance
(
attribute
,
str
)
and
isinstance
(
rowI
,
int
):
try
:
list
,
type
=
self
.
__formatPdbx
(
self
.
_rowList
[
rowI
][
self
.
_attributeNameList
.
index
(
attribute
)])
return
""
.
join
(
list
)
except
(
IndexError
):
self
.
__lfh
.
write
(
"attributeName %s rowI %r rowdata %r
\n
"
%
(
attributeName
,
rowI
,
self
.
_rowList
[
rowI
]))
raise
IndexError
raise
TypeError
,
attribute
def
getValueFormattedByIndex
(
self
,
attributeIndex
,
rowIndex
):
try
:
list
,
type
=
self
.
__formatPdbx
(
self
.
_rowList
[
rowIndex
][
attributeIndex
])
return
""
.
join
(
list
)
except
(
IndexError
):
raise
IndexError
def
getAttributeValueMaxLengthList
(
self
,
steps
=
1
):
mList
=
[
0
for
i
in
range
(
len
(
self
.
_attributeNameList
))]
for
row
in
self
.
_rowList
[::
steps
]:
for
indx
in
range
(
len
(
self
.
_attributeNameList
)):
val
=
row
[
indx
]
mList
[
indx
]
=
max
(
mList
[
indx
],
len
(
str
(
val
)))
return
mList
def
getFormatTypeList
(
self
,
steps
=
1
):
try
:
curDataTypeList
=
[
'DT_NULL_VALUE'
for
i
in
range
(
len
(
self
.
_attributeNameList
))]
for
row
in
self
.
_rowList
[::
steps
]:
for
indx
in
range
(
len
(
self
.
_attributeNameList
)):
val
=
row
[
indx
]
# print "index ",indx," val ",val
dType
=
self
.
__dataTypePdbx
(
val
)
dIndx
=
self
.
__dataTypeList
.
index
(
dType
)
# print "d type", dType, " d type index ",dIndx
cType
=
curDataTypeList
[
indx
]
cIndx
=
self
.
__dataTypeList
.
index
(
cType
)
cIndx
=
max
(
cIndx
,
dIndx
)
curDataTypeList
[
indx
]
=
self
.
__dataTypeList
[
cIndx
]
# Map the format types to the data types
curFormatTypeList
=
[]
for
dt
in
curDataTypeList
:
ii
=
self
.
__dataTypeList
.
index
(
dt
)
curFormatTypeList
.
append
(
self
.
__formatTypeList
[
ii
])
except
:
self
.
__lfh
.
write
(
"PdbxDataCategory(getFormatTypeList) ++Index error at index %d in row %r
\n
"
%
(
indx
,
row
))
return
curFormatTypeList
,
curDataTypeList
def
getFormatTypeListX
(
self
):
curDataTypeList
=
[
'DT_NULL_VALUE'
for
i
in
range
(
len
(
self
.
_attributeNameList
))]
for
row
in
self
.
_rowList
:
for
indx
in
range
(
len
(
self
.
_attributeNameList
)):
val
=
row
[
indx
]
#print "index ",indx," val ",val
dType
=
self
.
__dataTypePdbx
(
val
)
dIndx
=
self
.
__dataTypeList
.
index
(
dType
)
#print "d type", dType, " d type index ",dIndx
cType
=
curDataTypeList
[
indx
]
cIndx
=
self
.
__dataTypeList
.
index
(
cType
)
cIndx
=
max
(
cIndx
,
dIndx
)
curDataTypeList
[
indx
]
=
self
.
__dataTypeList
[
cIndx
]
# Map the format types to the data types
curFormatTypeList
=
[]
for
dt
in
curDataTypeList
:
ii
=
self
.
__dataTypeList
.
index
(
dt
)
curFormatTypeList
.
append
(
self
.
__formatTypeList
[
ii
])
return
curFormatTypeList
,
curDataTypeList
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxParser.py
# Date: 2009-10-25 Jdw Original from py-pdbx-parser-v2
#
# Update:
#
# 2009-11-05 - (jdw) Change table storage architecture for list of
# dictionaries to list of lists.
# 2012-01-09 - (jdw) This module now obsolted by PdbxReader/PdbxWriter
# modules. APIs are preserved.
#
# 2012-09-01 - (jdw) Revise tokenizer to better handle embedded quoting.
#
# NOTE - - Now obsolete - Use pdb.reader.PdbxReader & pdbx.writer.PdbxWriter
#
##
"""
PDBx/mmCIF dictionary and data file parser.
Acknowledgements:
The tokenizer used in this module is modeled after the clever parser design
used in the PyMMLIB package.
PyMMLib Development Group
Authors: Ethan Merritt: merritt@u.washington.ed & Jay Painter: jay.painter@gmail.com
See: http://pymmlib.sourceforge.net/
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
import
re
,
sys
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxError
(
Exception
):
""" Class for catch general errors
"""
pass
class
SyntaxError
(
Exception
):
""" Class for catching syntax errors
"""
def
__init__
(
self
,
lineNumber
,
text
):
Exception
.
__init__
(
self
)
self
.
lineNumber
=
lineNumber
self
.
text
=
text
def
__str__
(
self
):
return
"%%ERROR - [at line: %d] %s"
%
(
self
.
lineNumber
,
self
.
text
)
class
PdbxReader
(
object
):
""" PDBx reader for data files and dictionaries.
"""
def
__init__
(
self
,
ifh
):
""" ifh - input file handle returned by open()
"""
#
self
.
__curLineNumber
=
0
self
.
__ifh
=
ifh
self
.
__stateDict
=
{
"data"
:
"ST_DATA_CONTAINER"
,
"loop"
:
"ST_TABLE"
,
"global"
:
"ST_GLOBAL_CONTAINER"
,
"save"
:
"ST_DEFINITION"
,
"stop"
:
"ST_STOP"
}
def
read
(
self
,
containerList
):
"""
Appends to the input list of definition and data containers.
"""
self
.
__curLineNumber
=
0
try
:
self
.
__parser
(
self
.
__tokenizer
(
self
.
__ifh
),
containerList
)
except
StopIteration
:
pass
else
:
raise
PdbxError
()
def
__syntaxError
(
self
,
errText
):
raise
SyntaxError
(
self
.
__curLineNumber
,
errText
)
def
__getContainerName
(
self
,
inWord
):
""" Returns the name of the data_ or save_ container
"""
return
str
(
inWord
[
5
:]).
strip
()
def
__getState
(
self
,
inWord
):
"""Identifies reserved syntax elements and assigns an associated state.
Returns: (reserved word, state)
where -
reserved word - is one of CIF syntax elements:
data_, loop_, global_, save_, stop_
state - the parser state required to process this next section.
"""
i
=
inWord
.
find
(
"_"
)
if
i
==
-
1
:
return
None
,
"ST_UNKNOWN"
try
:
rWord
=
inWord
[:
i
].
lower
()
return
rWord
,
self
.
__stateDict
[
rWord
]
except
:
return
None
,
"ST_UNKNOWN"
def
__parser
(
self
,
tokenizer
,
containerList
):
""" Parser for PDBx data files and dictionaries.
Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
quoted strings (single, double and multi-line semi-colon delimited), and unquoted
strings.
containerList - list-type container for data and definition objects parsed from
from the input file.
Return:
containerList - is appended with data and definition objects -
"""
# Working container - data or definition
curContainer
=
None
#
# Working category container
categoryIndex
=
{}
curCategory
=
None
#
curRow
=
None
state
=
None
# Find the first reserved word and begin capturing data.
#
while
True
:
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curWord
is
None
:
continue
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
break
while
True
:
#
# Set the current state -
#
# At this point in the processing cycle we are expecting a token containing
# either a '_category.attribute' or a reserved word.
#
if
curCatName
is
not
None
:
state
=
"ST_KEY_VALUE_PAIR"
elif
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
else
:
self
.
__syntaxError
(
"Miscellaneous syntax error"
)
return
#
# Process _category.attribute value assignments
#
if
state
==
"ST_KEY_VALUE_PAIR"
:
try
:
curCategory
=
categoryIndex
[
curCatName
]
except
KeyError
:
# A new category is encountered - create a container and add a row
curCategory
=
categoryIndex
[
curCatName
]
=
DataCategory
(
curCatName
)
try
:
curContainer
.
append
(
curCategory
)
except
AttributeError
:
self
.
__syntaxError
(
"Category cannot be added to data_ block"
)
return
curRow
=
[]
curCategory
.
append
(
curRow
)
else
:
# Recover the existing row from the category
try
:
curRow
=
curCategory
[
0
]
except
IndexError
:
self
.
__syntaxError
(
"Internal index error accessing category data"
)
return
# Check for duplicate attributes and add attribute to table.
if
curAttName
in
curCategory
.
getAttributeList
():
self
.
__syntaxError
(
"Duplicate attribute encountered in category"
)
return
else
:
curCategory
.
appendAttribute
(
curAttName
)
# Get the data for this attribute from the next token
tCat
,
tAtt
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
tCat
is
not
None
or
(
curQuotedString
is
None
and
curWord
is
None
):
self
.
__syntaxError
(
"Missing data for item _%s.%s"
%
(
curCatName
,
curAttName
))
if
curWord
is
not
None
:
#
# Validation check token for misplaced reserved words -
#
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
self
.
__syntaxError
(
"Unexpected reserved word: %s"
%
(
reservedWord
))
curRow
.
append
(
curWord
)
elif
curQuotedString
is
not
None
:
curRow
.
append
(
curQuotedString
)
else
:
self
.
__syntaxError
(
"Missing value in item-value pair"
)
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
continue
#
# Process a loop_ declaration and associated data -
#
elif
state
==
"ST_TABLE"
:
# The category name in the next curCatName,curAttName pair
# defines the name of the category container.
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curCatName
is
None
or
curAttName
is
None
:
self
.
__syntaxError
(
"Unexpected token in loop_ declaration"
)
return
# Check for a previous category declaration.
if
categoryIndex
.
has_key
(
curCatName
):
self
.
__syntaxError
(
"Duplicate category declaration in loop_"
)
return
curCategory
=
DataCategory
(
curCatName
)
try
:
curContainer
.
append
(
curCategory
)
except
AttributeError
:
self
.
__syntaxError
(
"loop_ declaration outside of data_ block or save_ frame"
)
return
curCategory
.
appendAttribute
(
curAttName
)
# Read the rest of the loop_ declaration
while
True
:
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curCatName
is
None
:
break
if
curCatName
!=
curCategory
.
getName
():
self
.
__syntaxError
(
"Changed category name in loop_ declaration"
)
return
curCategory
.
appendAttribute
(
curAttName
)
# If the next token is a 'word', check it for any reserved words -
if
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
if
reservedWord
==
"stop"
:
return
else
:
self
.
__syntaxError
(
"Unexpected reserved word after loop declaration: %s"
%
(
reservedWord
))
# Read the table of data for this loop_ -
while
True
:
curRow
=
[]
curCategory
.
append
(
curRow
)
for
tAtt
in
curCategory
.
getAttributeList
():
if
curWord
is
not
None
:
curRow
.
append
(
curWord
)
elif
curQuotedString
is
not
None
:
curRow
.
append
(
curQuotedString
)
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
# loop_ data processing ends if -
# A new _category.attribute is encountered
if
curCatName
is
not
None
:
break
# A reserved word is encountered
if
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
break
continue
elif
state
==
"ST_DEFINITION"
:
# Ignore trailing unnamed saveframe delimiters e.g. 'save_'
sName
=
self
.
__getContainerName
(
curWord
)
if
(
len
(
sName
)
>
0
):
curContainer
=
DefinitionContainer
(
sName
)
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_DATA_CONTAINER"
:
#
dName
=
self
.
__getContainerName
(
curWord
)
if
len
(
dName
)
==
0
:
dName
=
"unidentified"
curContainer
=
DataContainer
(
dName
)
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_STOP"
:
return
elif
state
==
"ST_GLOBAL"
:
curContainer
=
DataContainer
(
"blank-global"
)
curContainer
.
setGlobal
()
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_UNKNOWN"
:
self
.
__syntaxError
(
"Unrecogized syntax element: "
+
str
(
curWord
))
return
def
__tokenizer
(
self
,
ifh
):
""" Tokenizer method for the mmCIF syntax file -
Each return/yield from this method returns information about
the next token in the form of a tuple with the following structure.
(category name, attribute name, quoted strings, words w/o quotes or white space)
Differentiated the reqular expression to the better handle embedded quotes.
"""
#
# Regex definition for mmCIF syntax - semi-colon delimited strings are handled
# outside of this regex.
mmcifRe
=
re
.
compile
(
r
"(?:"
"(?:_(.+?)[.](\S+))"
"|"
# _category.attribute
"(?:['](.*?)(?:[']\s|[']$))"
"|"
# single quoted strings
"(?:[
\"
](.*?)(?:[
\"
]\s|[
\"
]$))"
"|"
# double quoted strings
"(?:\s*#.*$)"
"|"
# comments (dumped)
"(\S+)"
# unquoted words
")"
)
fileIter
=
iter
(
ifh
)
## Tokenizer loop begins here ---
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
# Dump comments
if
line
.
startswith
(
"#"
):
continue
# Gobble up the entire semi-colon/multi-line delimited string and
# and stuff this into the string slot in the return tuple
#
if
line
.
startswith
(
";"
):
mlString
=
[
line
[
1
:]]
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
if
line
.
startswith
(
";"
):
break
mlString
.
append
(
line
)
# remove trailing new-line that is part of the \n; delimiter
mlString
[
-
1
]
=
mlString
[
-
1
].
rstrip
()
#
yield
(
None
,
None
,
""
.
join
(
mlString
),
None
)
#
# Need to process the remainder of the current line -
line
=
line
[
1
:]
#continue
# Apply regex to the current line consolidate the single/double
# quoted within the quoted string category
for
it
in
mmcifRe
.
finditer
(
line
):
tgroups
=
it
.
groups
()
if
tgroups
!=
(
None
,
None
,
None
,
None
,
None
):
if
tgroups
[
2
]
is
not
None
:
qs
=
tgroups
[
2
]
elif
tgroups
[
3
]
is
not
None
:
qs
=
tgroups
[
3
]
else
:
qs
=
None
groups
=
(
tgroups
[
0
],
tgroups
[
1
],
qs
,
tgroups
[
4
])
yield
groups
def
__tokenizerOrg
(
self
,
ifh
):
""" Tokenizer method for the mmCIF syntax file -
Each return/yield from this method returns information about
the next token in the form of a tuple with the following structure.
(category name, attribute name, quoted strings, words w/o quotes or white space)
mmcifRe = re.compile(
r"(?:"
"(?:_(.+?)[.](\S+))" "|" # _category.attribute
"(?:['
\"
](.*?)(?:['
\"
]\s|['
\"
]$))" "|" # quoted strings
"(?:\s*#.*$)" "|" # comments (dumped)
"(\S+)" # unquoted words
")")
"""
#
# Regex definition for mmCIF syntax - semi-colon delimited strings are handled
# outside of this regex.
mmcifRe
=
re
.
compile
(
r
"(?:"
"(?:_(.+?)[.](\S+))"
"|"
# _category.attribute
"(?:['
\"
](.*?)(?:['
\"
]\s|['
\"
]$))"
"|"
# quoted strings
"(?:\s*#.*$)"
"|"
# comments (dumped)
"(\S+)"
# unquoted words
")"
)
fileIter
=
iter
(
ifh
)
## Tokenizer loop begins here ---
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
# Dump comments
if
line
.
startswith
(
"#"
):
continue
# Gobble up the entire semi-colon/multi-line delimited string and
# and stuff this into the string slot in the return tuple
#
if
line
.
startswith
(
";"
):
mlString
=
[
line
[
1
:]]
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
if
line
.
startswith
(
";"
):
break
mlString
.
append
(
line
)
# remove trailing new-line that is part of the \n; delimiter
mlString
[
-
1
]
=
mlString
[
-
1
].
rstrip
()
#
yield
(
None
,
None
,
""
.
join
(
mlString
),
None
)
#
# Need to process the remainder of the current line -
line
=
line
[
1
:]
#continue
## Apply regex to the current line
for
it
in
mmcifRe
.
finditer
(
line
):
groups
=
it
.
groups
()
if
groups
!=
(
None
,
None
,
None
,
None
):
yield
groups
class
PdbxWriter
(
object
):
"""Write PDBx data files or dictionaries using the input container
or container list.
"""
def
__init__
(
self
,
ofh
=
sys
.
stdout
):
self
.
__ofh
=
ofh
self
.
__containerList
=
[]
self
.
__MAXIMUM_LINE_LENGTH
=
2048
self
.
__SPACING
=
2
self
.
__INDENT_DEFINITION
=
3
self
.
__indentSpace
=
" "
*
self
.
__INDENT_DEFINITION
self
.
__doDefinitionIndent
=
False
def
write
(
self
,
containerList
):
self
.
__containerList
=
containerList
for
container
in
self
.
__containerList
:
self
.
writeContainer
(
container
)
def
writeContainer
(
self
,
container
):
indS
=
" "
*
self
.
__INDENT_DEFINITION
if
isinstance
(
container
,
DefinitionContainer
):
self
.
__write
(
"save_%s
\n
"
%
container
.
getName
())
self
.
__doDefinitionIndent
=
True
self
.
__write
(
indS
+
"#
\n
"
)
elif
isinstance
(
container
,
DataContainer
):
if
(
container
.
getGlobal
()):
self
.
__write
(
"global_
\n
"
)
self
.
__doDefinitionIndent
=
False
self
.
__write
(
"
\n
"
)
else
:
self
.
__write
(
"data_%s
\n
"
%
container
.
getName
())
self
.
__doDefinitionIndent
=
False
self
.
__write
(
"#
\n
"
)
for
nm
in
container
.
getObjNameList
():
obj
=
container
.
getObj
(
nm
)
objL
=
obj
.
getRowList
()
# Skip empty objects
if
len
(
objL
)
==
0
:
continue
# Item - value formattting
elif
len
(
objL
)
==
1
:
self
.
__writeItemValueFormat
(
obj
)
# Table formatting -
elif
len
(
objL
)
>
1
and
len
(
obj
.
getAttributeList
())
>
0
:
self
.
__writeTableFormat
(
obj
)
else
:
raise
PdbxError
()
if
self
.
__doDefinitionIndent
:
self
.
__write
(
indS
+
"#"
)
else
:
self
.
__write
(
"#"
)
# Add a trailing saveframe reserved word
if
isinstance
(
container
,
DefinitionContainer
):
self
.
__write
(
"save_
\n
"
)
self
.
__write
(
"#
\n
"
)
def
__write
(
self
,
st
):
self
.
__ofh
.
write
(
st
)
def
__writeItemValueFormat
(
self
,
myCategory
):
# Compute the maximum item name length within this category -
attributeNameLengthMax
=
0
for
attributeName
in
myCategory
.
getAttributeList
():
attributeNameLengthMax
=
max
(
attributeNameLengthMax
,
len
(
attributeName
))
itemNameLengthMax
=
self
.
__SPACING
+
len
(
myCategory
.
getName
())
+
attributeNameLengthMax
+
2
#
lineList
=
[]
for
attributeName
,
iPos
in
myCategory
.
getAttributeListWithOrder
():
lineList
.
append
(
"
\n
"
)
if
self
.
__doDefinitionIndent
:
# - add indent --
lineList
.
append
(
self
.
__indentSpace
)
itemName
=
"_%s.%s"
%
(
myCategory
.
getName
(),
attributeName
)
lineList
.
append
(
itemName
.
ljust
(
itemNameLengthMax
))
lineList
.
append
(
myCategory
.
getValueFormatted
(
attributeName
,
0
))
lineList
.
append
(
"
\n
"
)
self
.
__write
(
""
.
join
(
lineList
))
def
__writeTableFormat
(
self
,
myCategory
):
# Write the declaration of the loop_
#
lineList
=
[]
lineList
.
append
(
'
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
)
lineList
.
append
(
"loop_"
)
for
attributeName
in
myCategory
.
getAttributeList
():
lineList
.
append
(
'
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
)
itemName
=
"_%s.%s"
%
(
myCategory
.
getName
(),
attributeName
)
lineList
.
append
(
itemName
)
self
.
__write
(
""
.
join
(
lineList
))
#
# Write the data in tabular format -
#
#print myCategory.getName()
#print myCategory.getAttributeList()
formatTypeList
,
dataTypeList
=
myCategory
.
getFormatTypeList
()
maxLengthList
=
myCategory
.
getAttributeValueMaxLengthList
()
spacing
=
" "
*
self
.
__SPACING
#
#print formatTypeList
#print dataTypeList
#print maxLengthList
#
for
iRow
in
range
(
myCategory
.
getRowCount
()):
lineList
=
[]
lineList
.
append
(
'
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
+
" "
)
for
iAt
in
range
(
myCategory
.
getAttributeCount
()):
formatType
=
formatTypeList
[
iAt
]
maxLength
=
maxLengthList
[
iAt
]
if
(
formatType
==
'FT_UNQUOTED_STRING'
or
formatType
==
'FT_NULL_VALUE'
):
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
ljust
(
maxLength
))
elif
formatType
==
'FT_NUMBER'
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
rjust
(
maxLength
))
elif
formatType
==
'FT_QUOTED_STRING'
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
ljust
(
maxLength
))
elif
formatType
==
"FT_MULTI_LINE_STRING"
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
)
lineList
.
append
(
spacing
)
self
.
__write
(
""
.
join
(
lineList
))
self
.
__write
(
"
\n
"
)
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxReadWriteTests.py
# Author: jdw
# Date: 9-Oct-2011
# Version: 0.001
#
# Updated:
# 24-Oct-2012 jdw update path details and reorganize.
#
##
""" Various tests caess for PDBx/mmCIF data file and dictionary reader and writer.
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
import
sys
,
unittest
,
traceback
import
sys
,
time
,
os
,
os
.
path
,
shutil
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app.internal.pdbx.writer.PdbxWriter
import
PdbxWriter
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxReadWriteTests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
lfh
=
sys
.
stdout
self
.
verbose
=
False
self
.
pathPdbxDataFile
=
"../tests/1kip.cif"
self
.
pathOutputFile
=
"testOutputDataFile.cif"
def
tearDown
(
self
):
pass
def
testSimpleInitialization
(
self
):
"""Test case - Simple initialization of a data category and data block
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
fn
=
"test-simple.cif"
attributeNameList
=
[
'aOne'
,
'aTwo'
,
'aThree'
,
'aFour'
,
'aFive'
,
'aSix'
,
'aSeven'
,
'aEight'
,
'aNine'
,
'aTen'
]
rowList
=
[[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
],
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
]
]
nameCat
=
'myCategory'
#
#
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
nameCat
,
attributeNameList
,
rowList
)
aCat
.
printIt
()
curContainer
.
append
(
aCat
)
curContainer
.
printIt
()
#
myContainerList
=
[]
myContainerList
.
append
(
curContainer
)
ofh
=
open
(
fn
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myContainerList
)
ofh
.
close
()
myContainerList
=
[]
ifh
=
open
(
fn
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myContainerList
)
ifh
.
close
()
for
container
in
myContainerList
:
for
objName
in
container
.
getObjNameList
():
name
,
aList
,
rList
=
container
.
getObj
(
objName
).
get
()
self
.
lfh
.
write
(
"Recovered data category %s
\n
"
%
name
)
self
.
lfh
.
write
(
"Attribute list %r
\n
"
%
repr
(
aList
))
self
.
lfh
.
write
(
"Row list %r
\n
"
%
repr
(
rList
))
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testWriteDataFile
(
self
):
"""Test case - write data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ofh
=
open
(
"test-output.cif"
,
"w"
)
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
1
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
7
,
6
,
5
,
4
,
3
,
2
,
1
])
aCat
.
printIt
()
curContainer
.
append
(
aCat
)
curContainer
.
printIt
()
#
myDataList
.
append
(
curContainer
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testUpdateDataFile
(
self
):
"""Test case - update data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
# Create a initial data file --
#
myDataList
=
[]
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
([
9
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
10
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
11
,
2
,
3
,
4
,
5
,
6
,
7
])
aCat
.
append
([
12
,
2
,
3
,
4
,
5
,
6
,
7
])
#self.lfh.write("Assigned data category state-----------------\n")
#aCat.dumpIt(fh=self.lfh)
curContainer
.
append
(
aCat
)
myDataList
.
append
(
curContainer
)
ofh
=
open
(
"test-output-1.cif"
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
#
#
# Read and update the data -
#
myDataList
=
[]
ifh
=
open
(
"test-output-1.cif"
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
#
myBlock
=
myDataList
[
0
]
myBlock
.
printIt
()
myCat
=
myBlock
.
getObj
(
'pdbx_seqtool_mapping_ref'
)
myCat
.
printIt
()
for
iRow
in
xrange
(
0
,
myCat
.
getRowCount
()):
myCat
.
setValue
(
'some value'
,
'ref_mon_id'
,
iRow
)
myCat
.
setValue
(
100
,
'ref_mon_num'
,
iRow
)
ofh
=
open
(
"test-output-2.cif"
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
#
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testReadDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
testReadWriteDataFile
(
self
):
"""Test case - data file read write test
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
ofh
=
open
(
self
.
pathOutputFile
,
"w"
)
pWr
=
PdbxWriter
(
ofh
)
pWr
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
self
.
lfh
)
self
.
fail
()
def
simpleSuite
():
suiteSelect
=
unittest
.
TestSuite
()
suiteSelect
.
addTest
(
PdbxReadWriteTests
(
"testSimpleInitialization"
))
suiteSelect
.
addTest
(
PdbxReadWriteTests
(
"testUpdateDataFile"
))
suiteSelect
.
addTest
(
PdbxReadWriteTests
(
"testReadWriteDataFile"
))
return
suiteSelect
if
__name__
==
'__main__'
:
#
mySuite
=
simpleSuite
()
unittest
.
TextTestRunner
(
verbosity
=
2
).
run
(
mySuite
)
#
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxReader.py
# Date: 2012-01-09 Jdw Adapted from PdbxParser
#
# Updates:
#
# 2012-01-09 - (jdw) Separate reader and writer classes.
#
# 2012-09-02 - (jdw) Revise tokenizer to better handle embedded quoting.
#
##
"""
PDBx/mmCIF dictionary and data file parser.
Acknowledgements:
The tokenizer used in this module is modeled after the clever parser design
used in the PyMMLIB package.
PyMMLib Development Group
Authors: Ethan Merritt: merritt@u.washington.ed & Jay Painter: jay.painter@gmail.com
See: http://pymmlib.sourceforge.net/
"""
import
re
,
sys
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxError
(
Exception
):
""" Class for catch general errors
"""
pass
class
SyntaxError
(
Exception
):
""" Class for catching syntax errors
"""
def
__init__
(
self
,
lineNumber
,
text
):
Exception
.
__init__
(
self
)
self
.
lineNumber
=
lineNumber
self
.
text
=
text
def
__str__
(
self
):
return
"%%ERROR - [at line: %d] %s"
%
(
self
.
lineNumber
,
self
.
text
)
class
PdbxReader
(
object
):
""" PDBx reader for data files and dictionaries.
"""
def
__init__
(
self
,
ifh
):
""" ifh - input file handle returned by open()
"""
#
self
.
__curLineNumber
=
0
self
.
__ifh
=
ifh
self
.
__stateDict
=
{
"data"
:
"ST_DATA_CONTAINER"
,
"loop"
:
"ST_TABLE"
,
"global"
:
"ST_GLOBAL_CONTAINER"
,
"save"
:
"ST_DEFINITION"
,
"stop"
:
"ST_STOP"
}
def
read
(
self
,
containerList
):
"""
Appends to the input list of definition and data containers.
"""
self
.
__curLineNumber
=
0
try
:
self
.
__parser
(
self
.
__tokenizer
(
self
.
__ifh
),
containerList
)
except
StopIteration
:
pass
else
:
raise
PdbxError
()
def
__syntaxError
(
self
,
errText
):
raise
SyntaxError
(
self
.
__curLineNumber
,
errText
)
def
__getContainerName
(
self
,
inWord
):
""" Returns the name of the data_ or save_ container
"""
return
str
(
inWord
[
5
:]).
strip
()
def
__getState
(
self
,
inWord
):
"""Identifies reserved syntax elements and assigns an associated state.
Returns: (reserved word, state)
where -
reserved word - is one of CIF syntax elements:
data_, loop_, global_, save_, stop_
state - the parser state required to process this next section.
"""
i
=
inWord
.
find
(
"_"
)
if
i
==
-
1
:
return
None
,
"ST_UNKNOWN"
try
:
rWord
=
inWord
[:
i
].
lower
()
return
rWord
,
self
.
__stateDict
[
rWord
]
except
:
return
None
,
"ST_UNKNOWN"
def
__parser
(
self
,
tokenizer
,
containerList
):
""" Parser for PDBx data files and dictionaries.
Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
quoted strings (single, double and multi-line semi-colon delimited), and unquoted
strings.
containerList - list-type container for data and definition objects parsed from
from the input file.
Return:
containerList - is appended with data and definition objects -
"""
# Working container - data or definition
curContainer
=
None
#
# Working category container
categoryIndex
=
{}
curCategory
=
None
#
curRow
=
None
state
=
None
# Find the first reserved word and begin capturing data.
#
while
True
:
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curWord
is
None
:
continue
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
break
while
True
:
#
# Set the current state -
#
# At this point in the processing cycle we are expecting a token containing
# either a '_category.attribute' or a reserved word.
#
if
curCatName
is
not
None
:
state
=
"ST_KEY_VALUE_PAIR"
elif
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
else
:
self
.
__syntaxError
(
"Miscellaneous syntax error"
)
return
#
# Process _category.attribute value assignments
#
if
state
==
"ST_KEY_VALUE_PAIR"
:
try
:
curCategory
=
categoryIndex
[
curCatName
]
except
KeyError
:
# A new category is encountered - create a container and add a row
curCategory
=
categoryIndex
[
curCatName
]
=
DataCategory
(
curCatName
)
try
:
curContainer
.
append
(
curCategory
)
except
AttributeError
:
self
.
__syntaxError
(
"Category cannot be added to data_ block"
)
return
curRow
=
[]
curCategory
.
append
(
curRow
)
else
:
# Recover the existing row from the category
try
:
curRow
=
curCategory
[
0
]
except
IndexError
:
self
.
__syntaxError
(
"Internal index error accessing category data"
)
return
# Check for duplicate attributes and add attribute to table.
if
curAttName
in
curCategory
.
getAttributeList
():
self
.
__syntaxError
(
"Duplicate attribute encountered in category"
)
return
else
:
curCategory
.
appendAttribute
(
curAttName
)
# Get the data for this attribute from the next token
tCat
,
tAtt
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
tCat
is
not
None
or
(
curQuotedString
is
None
and
curWord
is
None
):
self
.
__syntaxError
(
"Missing data for item _%s.%s"
%
(
curCatName
,
curAttName
))
if
curWord
is
not
None
:
#
# Validation check token for misplaced reserved words -
#
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
self
.
__syntaxError
(
"Unexpected reserved word: %s"
%
(
reservedWord
))
curRow
.
append
(
curWord
)
elif
curQuotedString
is
not
None
:
curRow
.
append
(
curQuotedString
)
else
:
self
.
__syntaxError
(
"Missing value in item-value pair"
)
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
continue
#
# Process a loop_ declaration and associated data -
#
elif
state
==
"ST_TABLE"
:
# The category name in the next curCatName,curAttName pair
# defines the name of the category container.
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curCatName
is
None
or
curAttName
is
None
:
self
.
__syntaxError
(
"Unexpected token in loop_ declaration"
)
return
# Check for a previous category declaration.
if
categoryIndex
.
has_key
(
curCatName
):
self
.
__syntaxError
(
"Duplicate category declaration in loop_"
)
return
curCategory
=
DataCategory
(
curCatName
)
try
:
curContainer
.
append
(
curCategory
)
except
AttributeError
:
self
.
__syntaxError
(
"loop_ declaration outside of data_ block or save_ frame"
)
return
curCategory
.
appendAttribute
(
curAttName
)
# Read the rest of the loop_ declaration
while
True
:
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
if
curCatName
is
None
:
break
if
curCatName
!=
curCategory
.
getName
():
self
.
__syntaxError
(
"Changed category name in loop_ declaration"
)
return
curCategory
.
appendAttribute
(
curAttName
)
# If the next token is a 'word', check it for any reserved words -
if
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
if
reservedWord
==
"stop"
:
return
else
:
self
.
__syntaxError
(
"Unexpected reserved word after loop declaration: %s"
%
(
reservedWord
))
# Read the table of data for this loop_ -
while
True
:
curRow
=
[]
curCategory
.
append
(
curRow
)
for
tAtt
in
curCategory
.
getAttributeList
():
if
curWord
is
not
None
:
curRow
.
append
(
curWord
)
elif
curQuotedString
is
not
None
:
curRow
.
append
(
curQuotedString
)
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
# loop_ data processing ends if -
# A new _category.attribute is encountered
if
curCatName
is
not
None
:
break
# A reserved word is encountered
if
curWord
is
not
None
:
reservedWord
,
state
=
self
.
__getState
(
curWord
)
if
reservedWord
is
not
None
:
break
continue
elif
state
==
"ST_DEFINITION"
:
# Ignore trailing unnamed saveframe delimiters e.g. 'save_'
sName
=
self
.
__getContainerName
(
curWord
)
if
(
len
(
sName
)
>
0
):
curContainer
=
DefinitionContainer
(
sName
)
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_DATA_CONTAINER"
:
#
dName
=
self
.
__getContainerName
(
curWord
)
if
len
(
dName
)
==
0
:
dName
=
"unidentified"
curContainer
=
DataContainer
(
dName
)
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_STOP"
:
return
elif
state
==
"ST_GLOBAL"
:
curContainer
=
DataContainer
(
"blank-global"
)
curContainer
.
setGlobal
()
containerList
.
append
(
curContainer
)
categoryIndex
=
{}
curCategory
=
None
curCatName
,
curAttName
,
curQuotedString
,
curWord
=
tokenizer
.
next
()
elif
state
==
"ST_UNKNOWN"
:
self
.
__syntaxError
(
"Unrecogized syntax element: "
+
str
(
curWord
))
return
def
__tokenizer
(
self
,
ifh
):
""" Tokenizer method for the mmCIF syntax file -
Each return/yield from this method returns information about
the next token in the form of a tuple with the following structure.
(category name, attribute name, quoted strings, words w/o quotes or white space)
Differentiated the reqular expression to the better handle embedded quotes.
"""
#
# Regex definition for mmCIF syntax - semi-colon delimited strings are handled
# outside of this regex.
mmcifRe
=
re
.
compile
(
r
"(?:"
"(?:_(.+?)[.](\S+))"
"|"
# _category.attribute
"(?:['](.*?)(?:[']\s|[']$))"
"|"
# single quoted strings
"(?:[
\"
](.*?)(?:[
\"
]\s|[
\"
]$))"
"|"
# double quoted strings
"(?:\s*#.*$)"
"|"
# comments (dumped)
"(\S+)"
# unquoted words
")"
)
fileIter
=
iter
(
ifh
)
## Tokenizer loop begins here ---
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
# Dump comments
if
line
.
startswith
(
"#"
):
continue
# Gobble up the entire semi-colon/multi-line delimited string and
# and stuff this into the string slot in the return tuple
#
if
line
.
startswith
(
";"
):
mlString
=
[
line
[
1
:]]
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
if
line
.
startswith
(
";"
):
break
mlString
.
append
(
line
)
# remove trailing new-line that is part of the \n; delimiter
mlString
[
-
1
]
=
mlString
[
-
1
].
rstrip
()
#
yield
(
None
,
None
,
""
.
join
(
mlString
),
None
)
#
# Need to process the remainder of the current line -
line
=
line
[
1
:]
#continue
# Apply regex to the current line consolidate the single/double
# quoted within the quoted string category
for
it
in
mmcifRe
.
finditer
(
line
):
tgroups
=
it
.
groups
()
if
tgroups
!=
(
None
,
None
,
None
,
None
,
None
):
if
tgroups
[
2
]
is
not
None
:
qs
=
tgroups
[
2
]
elif
tgroups
[
3
]
is
not
None
:
qs
=
tgroups
[
3
]
else
:
qs
=
None
groups
=
(
tgroups
[
0
],
tgroups
[
1
],
qs
,
tgroups
[
4
])
yield
groups
def
__tokenizerOrg
(
self
,
ifh
):
""" Tokenizer method for the mmCIF syntax file -
Each return/yield from this method returns information about
the next token in the form of a tuple with the following structure.
(category name, attribute name, quoted strings, words w/o quotes or white space)
"""
#
# Regex definition for mmCIF syntax - semi-colon delimited strings are handled
# outside of this regex.
mmcifRe
=
re
.
compile
(
r
"(?:"
"(?:_(.+?)[.](\S+))"
"|"
# _category.attribute
"(?:['
\"
](.*?)(?:['
\"
]\s|['
\"
]$))"
"|"
# quoted strings
"(?:\s*#.*$)"
"|"
# comments (dumped)
"(\S+)"
# unquoted words
")"
)
fileIter
=
iter
(
ifh
)
## Tokenizer loop begins here ---
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
# Dump comments
if
line
.
startswith
(
"#"
):
continue
# Gobble up the entire semi-colon/multi-line delimited string and
# and stuff this into the string slot in the return tuple
#
if
line
.
startswith
(
";"
):
mlString
=
[
line
[
1
:]]
while
True
:
line
=
fileIter
.
next
()
self
.
__curLineNumber
+=
1
if
line
.
startswith
(
";"
):
break
mlString
.
append
(
line
)
# remove trailing new-line that is part of the \n; delimiter
mlString
[
-
1
]
=
mlString
[
-
1
].
rstrip
()
#
yield
(
None
,
None
,
""
.
join
(
mlString
),
None
)
#
# Need to process the remainder of the current line -
line
=
line
[
1
:]
#continue
## Apply regex to the current line
for
it
in
mmcifRe
.
finditer
(
line
):
groups
=
it
.
groups
()
if
groups
!=
(
None
,
None
,
None
,
None
):
yield
groups
wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxReaderTests.py
# Author: jdw
# Date: 9-Jan-2012
# Version: 0.001
#
# Update:
# 27-Sep-2012 jdw add test case for reading PDBx structure factor file
#
##
"""
Test cases for reading PDBx/mmCIF data files PdbxReader class -
"""
import
sys
,
unittest
,
traceback
import
sys
,
time
,
os
,
os
.
path
,
shutil
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxReaderTests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
lfh
=
sys
.
stderr
self
.
verbose
=
False
self
.
pathPdbxDataFile
=
"../tests/1kip.cif"
self
.
pathBigPdbxDataFile
=
"../tests/1ffk.cif"
self
.
pathSFDataFile
=
"../tests/1kip-sf.cif"
def
tearDown
(
self
):
pass
def
testReadSmallDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadBigDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathBigPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadSFDataFile
(
self
):
"""Test case - read PDB structure factor data file and compute statistics on f/sig(f).
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myContainerList
=
[]
ifh
=
open
(
self
.
pathSFDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myContainerList
)
c0
=
myContainerList
[
0
]
#
catObj
=
c0
.
getObj
(
"refln"
)
if
catObj
is
None
:
return
false
nRows
=
catObj
.
getRowCount
()
#
# Get column name index.
#
itDict
=
{}
itNameList
=
catObj
.
getItemNameList
()
for
idxIt
,
itName
in
enumerate
(
itNameList
):
itDict
[
str
(
itName
).
lower
()]
=
idxIt
#
idf
=
itDict
[
'_refln.f_meas_au'
]
idsigf
=
itDict
[
'_refln.f_meas_sigma_au'
]
minR
=
100
maxR
=-
1
sumR
=
0
icount
=
0
for
row
in
catObj
.
getRowList
():
try
:
f
=
float
(
row
[
idf
])
sigf
=
float
(
row
[
idsigf
])
ratio
=
sigf
/
f
#self.lfh.write(" %f %f %f\n" % (f,sigf,ratio))
maxR
=
max
(
maxR
,
ratio
)
minR
=
min
(
minR
,
ratio
)
sumR
+=
ratio
icount
+=
1
except
:
continue
ifh
.
close
()
self
.
lfh
.
write
(
"f/sig(f) min %f max %f avg %f count %d
\n
"
%
(
minR
,
maxR
,
sumR
/
icount
,
icount
))
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
simpleSuite
():
suiteSelect
=
unittest
.
TestSuite
()
suiteSelect
.
addTest
(
PdbxReaderTests
(
"testReadBigDataFile"
))
suiteSelect
.
addTest
(
PdbxReaderTests
(
"testReadSmallDataFile"
))
suiteSelect
.
addTest
(
PdbxReaderTests
(
"testReadSFDataFile"
))
return
suiteSelect
if
__name__
==
'__main__'
:
mySuite
=
simpleSuite
()
unittest
.
TextTestRunner
(
verbosity
=
2
).
run
(
mySuite
)
#
wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
0 → 100644
View file @
db72d0cb
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxWriter.py
# Date: 2011-10-09 Jdw Adapted from PdbxParser.py
#
# Updates:
# 5-Apr-2011 jdw Using the double quote format preference
# 23-Oct-2012 jdw update path details and reorganize.
#
###
"""
Classes for writing data and dictionary containers in PDBx/mmCIF format.
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxError
(
Exception
):
""" Class for catch general errors
"""
pass
class
PdbxWriter
(
object
):
"""Write PDBx data files or dictionaries using the input container
or container list.
"""
def
__init__
(
self
,
ofh
=
sys
.
stdout
):
self
.
__ofh
=
ofh
self
.
__containerList
=
[]
self
.
__MAXIMUM_LINE_LENGTH
=
2048
self
.
__SPACING
=
2
self
.
__INDENT_DEFINITION
=
3
self
.
__indentSpace
=
" "
*
self
.
__INDENT_DEFINITION
self
.
__doDefinitionIndent
=
False
# Maximum number of rows checked for value length and format
self
.
__rowPartition
=
None
def
setRowPartition
(
self
,
numRows
):
''' Maximum number of rows checked for value length and format
'''
self
.
__rowPartition
=
numRows
def
write
(
self
,
containerList
):
self
.
__containerList
=
containerList
for
container
in
self
.
__containerList
:
self
.
writeContainer
(
container
)
def
writeContainer
(
self
,
container
):
indS
=
" "
*
self
.
__INDENT_DEFINITION
if
isinstance
(
container
,
DefinitionContainer
):
self
.
__write
(
"save_%s
\n
"
%
container
.
getName
())
self
.
__doDefinitionIndent
=
True
self
.
__write
(
indS
+
"#
\n
"
)
elif
isinstance
(
container
,
DataContainer
):
if
(
container
.
getGlobal
()):
self
.
__write
(
"global_
\n
"
)
self
.
__doDefinitionIndent
=
False
self
.
__write
(
"
\n
"
)
else
:
self
.
__write
(
"data_%s
\n
"
%
container
.
getName
())
self
.
__doDefinitionIndent
=
False
self
.
__write
(
"#
\n
"
)
for
nm
in
container
.
getObjNameList
():
obj
=
container
.
getObj
(
nm
)
objL
=
obj
.
getRowList
()
# Skip empty objects
if
len
(
objL
)
==
0
:
continue
# Item - value formattting
elif
len
(
objL
)
==
1
:
self
.
__writeItemValueFormat
(
obj
)
# Table formatting -
elif
len
(
objL
)
>
1
and
len
(
obj
.
getAttributeList
())
>
0
:
self
.
__writeTableFormat
(
obj
)
else
:
raise
PdbxError
()
if
self
.
__doDefinitionIndent
:
self
.
__write
(
indS
+
"#"
)
else
:
self
.
__write
(
"#"
)
# Add a trailing saveframe reserved word
if
isinstance
(
container
,
DefinitionContainer
):
self
.
__write
(
"
\n
save_
\n
"
)
self
.
__write
(
"#
\n
"
)
def
__write
(
self
,
st
):
self
.
__ofh
.
write
(
st
)
def
__writeItemValueFormat
(
self
,
myCategory
):
# Compute the maximum item name length within this category -
attributeNameLengthMax
=
0
for
attributeName
in
myCategory
.
getAttributeList
():
attributeNameLengthMax
=
max
(
attributeNameLengthMax
,
len
(
attributeName
))
itemNameLengthMax
=
self
.
__SPACING
+
len
(
myCategory
.
getName
())
+
attributeNameLengthMax
+
2
#
lineList
=
[]
lineList
.
append
(
"#
\n
"
)
for
attributeName
,
iPos
in
myCategory
.
getAttributeListWithOrder
():
if
self
.
__doDefinitionIndent
:
# - add indent --
lineList
.
append
(
self
.
__indentSpace
)
itemName
=
"_%s.%s"
%
(
myCategory
.
getName
(),
attributeName
)
lineList
.
append
(
itemName
.
ljust
(
itemNameLengthMax
))
lineList
.
append
(
myCategory
.
getValueFormatted
(
attributeName
,
0
))
lineList
.
append
(
"
\n
"
)
self
.
__write
(
""
.
join
(
lineList
))
def
__writeTableFormat
(
self
,
myCategory
):
# Write the declaration of the loop_
#
lineList
=
[]
lineList
.
append
(
'#
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
)
lineList
.
append
(
"loop_"
)
for
attributeName
in
myCategory
.
getAttributeList
():
lineList
.
append
(
'
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
)
itemName
=
"_%s.%s"
%
(
myCategory
.
getName
(),
attributeName
)
lineList
.
append
(
itemName
)
self
.
__write
(
""
.
join
(
lineList
))
#
# Write the data in tabular format -
#
#print myCategory.getName()
#print myCategory.getAttributeList()
# For speed make the following evaluation on a portion of the table
if
self
.
__rowPartition
is
not
None
:
numSteps
=
max
(
1
,
myCategory
.
getRowCount
()
/
self
.
__rowPartition
)
else
:
numSteps
=
1
formatTypeList
,
dataTypeList
=
myCategory
.
getFormatTypeList
(
steps
=
numSteps
)
maxLengthList
=
myCategory
.
getAttributeValueMaxLengthList
(
steps
=
numSteps
)
spacing
=
" "
*
self
.
__SPACING
#
#print formatTypeList
#print dataTypeList
#print maxLengthList
#
for
iRow
in
range
(
myCategory
.
getRowCount
()):
lineList
=
[]
lineList
.
append
(
'
\n
'
)
if
self
.
__doDefinitionIndent
:
lineList
.
append
(
self
.
__indentSpace
+
" "
)
for
iAt
in
range
(
myCategory
.
getAttributeCount
()):
formatType
=
formatTypeList
[
iAt
]
maxLength
=
maxLengthList
[
iAt
]
if
(
formatType
==
'FT_UNQUOTED_STRING'
or
formatType
==
'FT_NULL_VALUE'
):
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
ljust
(
maxLength
))
elif
formatType
==
'FT_NUMBER'
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
rjust
(
maxLength
))
elif
formatType
==
'FT_QUOTED_STRING'
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
.
ljust
(
maxLength
+
2
))
elif
formatType
==
"FT_MULTI_LINE_STRING"
:
val
=
myCategory
.
getValueFormattedByIndex
(
iAt
,
iRow
)
lineList
.
append
(
val
)
lineList
.
append
(
spacing
)
self
.
__write
(
""
.
join
(
lineList
))
self
.
__write
(
"
\n
"
)
wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
0 → 100644
View file @
db72d0cb
##
# File: PdbxWriterTests.py
# Author: jdw
# Date: 3-November-2009
# Version: 0.001
#
# Update:
# 5-Apr-2011 jdw Using the double quote format preference
# 24-Oct-2012 jdw Update path and examples.
##
"""
Test implementing PDBx/mmCIF write and formatting operations.
"""
__docformat__
=
"restructuredtext en"
__author__
=
"John Westbrook"
__email__
=
"jwest@rcsb.rutgers.edu"
__license__
=
"Creative Commons Attribution 3.0 Unported"
__version__
=
"V0.01"
import
sys
,
unittest
,
traceback
import
sys
,
time
,
os
,
os
.
path
,
shutil
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app.internal.pdbx.writer.PdbxWriter
import
PdbxWriter
from
simtk.openmm.app.internal.pdbx.reader.PdbxContainers
import
*
class
PdbxWriterTests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
lfh
=
sys
.
stderr
self
.
verbose
=
False
self
.
pathPdbxDataFile
=
"../tests/1kip.cif"
self
.
pathOutputFile
=
"testOutputDataFile.cif"
def
tearDown
(
self
):
pass
def
testWriteDataFile
(
self
):
"""Test case - write data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ofh
=
open
(
"test-output.cif"
,
"w"
)
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
curContainer
.
append
(
aCat
)
myDataList
.
append
(
curContainer
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testUpdateDataFile
(
self
):
"""Test case - write data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
# Create a initial data file --
#
myDataList
=
[]
ofh
=
open
(
"test-output-1.cif"
,
"w"
)
curContainer
=
DataContainer
(
"myblock"
)
aCat
=
DataCategory
(
"pdbx_seqtool_mapping_ref"
)
aCat
.
appendAttribute
(
"ordinal"
)
aCat
.
appendAttribute
(
"entity_id"
)
aCat
.
appendAttribute
(
"auth_mon_id"
)
aCat
.
appendAttribute
(
"auth_mon_num"
)
aCat
.
appendAttribute
(
"pdb_chain_id"
)
aCat
.
appendAttribute
(
"ref_mon_id"
)
aCat
.
appendAttribute
(
"ref_mon_num"
)
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
aCat
.
append
((
1
,
2
,
3
,
4
,
5
,
6
,
7
))
curContainer
.
append
(
aCat
)
myDataList
.
append
(
curContainer
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
#
# Read and update the data -
#
myDataList
=
[]
ifh
=
open
(
"test-output-1.cif"
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
#
myBlock
=
myDataList
[
0
]
myBlock
.
printIt
()
myCat
=
myBlock
.
getObj
(
'pdbx_seqtool_mapping_ref'
)
myCat
.
printIt
()
for
iRow
in
xrange
(
0
,
myCat
.
getRowCount
()):
myCat
.
setValue
(
'some value'
,
'ref_mon_id'
,
iRow
)
myCat
.
setValue
(
100
,
'ref_mon_num'
,
iRow
)
ofh
=
open
(
"test-output-2.cif"
,
"w"
)
pdbxW
=
PdbxWriter
(
ofh
)
pdbxW
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadDataFile
(
self
):
"""Test case - read data file
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
testReadWriteDataFile
(
self
):
"""Test case - data file read write test
"""
self
.
lfh
.
write
(
"
\n
Starting %s %s
\n
"
%
(
self
.
__class__
.
__name__
,
sys
.
_getframe
().
f_code
.
co_name
))
try
:
#
myDataList
=
[]
ifh
=
open
(
self
.
pathPdbxDataFile
,
"r"
)
pRd
=
PdbxReader
(
ifh
)
pRd
.
read
(
myDataList
)
ifh
.
close
()
ofh
=
open
(
self
.
pathOutputFile
,
"w"
)
pWr
=
PdbxWriter
(
ofh
)
pWr
.
write
(
myDataList
)
ofh
.
close
()
except
:
traceback
.
print_exc
(
file
=
sys
.
stderr
)
self
.
fail
()
def
suite
():
return
unittest
.
makeSuite
(
PdbxWriterTests
,
'test'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
0 → 100644
View file @
db72d0cb
wrappers/python/simtk/openmm/app/pdbxfile.py
0 → 100644
View file @
db72d0cb
"""
pdbfile.py: Used for loading PDB files.
This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2014 Stanford University and the Authors.
Authors: Peter Eastman
Contributors:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
__author__
=
"Peter Eastman"
__version__
=
"1.0"
import
os
import
sys
from
simtk.openmm
import
Vec3
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app
import
Topology
from
simtk.unit
import
nanometers
,
angstroms
,
is_quantity
,
norm
,
Quantity
import
element
as
elem
try
:
import
numpy
except
:
pass
class
PDBxFile
(
object
):
"""PDBxFile parses a PDBx/mmCIF file and constructs a Topology and a set of atom positions from it."""
def
__init__
(
self
,
file
):
"""Load a PDBx/mmCIF file.
The atom positions and Topology can be retrieved by calling getPositions() and getTopology().
Parameters:
- file (string) the name of the file to load. Alternatively you can pass an open file object.
"""
top
=
Topology
()
## The Topology read from the PDBx/mmCIF file
self
.
topology
=
top
self
.
_positions
=
[]
# Load the file.
inputFile
=
file
if
isinstance
(
file
,
str
):
inputFile
=
open
(
file
)
reader
=
PdbxReader
(
inputFile
)
data
=
[]
reader
.
read
(
data
)
block
=
data
[
0
]
# Build the topology.
atomData
=
block
.
getObj
(
'atom_site'
)
atomNameCol
=
atomData
.
getAttributeIndex
(
'label_atom_id'
)
atomIdCol
=
atomData
.
getAttributeIndex
(
'id'
)
resNameCol
=
atomData
.
getAttributeIndex
(
'label_comp_id'
)
resIdCol
=
atomData
.
getAttributeIndex
(
'label_seq_id'
)
asymIdCol
=
atomData
.
getAttributeIndex
(
'label_asym_id'
)
chainIdCol
=
atomData
.
getAttributeIndex
(
'label_entity_id'
)
elementCol
=
atomData
.
getAttributeIndex
(
'type_symbol'
)
modelCol
=
atomData
.
getAttributeIndex
(
'pdbx_PDB_model_num'
)
xCol
=
atomData
.
getAttributeIndex
(
'Cartn_x'
)
yCol
=
atomData
.
getAttributeIndex
(
'Cartn_y'
)
zCol
=
atomData
.
getAttributeIndex
(
'Cartn_z'
)
lastChainId
=
None
lastResId
=
None
lastAsymId
=
None
atomTable
=
{}
models
=
[]
for
row
in
atomData
.
getRowList
():
asymId
=
(
'A'
if
asymIdCol
==
-
1
else
row
[
asymIdCol
])
atomKey
=
((
row
[
resIdCol
],
asymId
,
row
[
atomNameCol
]))
model
=
(
'1'
if
modelCol
==
-
1
else
row
[
modelCol
])
if
model
not
in
models
:
models
.
append
(
model
)
self
.
_positions
.
append
([])
modelIndex
=
models
.
index
(
model
)
if
modelIndex
==
0
:
# This row defines a new atom.
if
lastChainId
!=
row
[
chainIdCol
]:
# The start of a new chain.
chain
=
top
.
addChain
()
lastChainId
=
row
[
chainIdCol
]
lastResId
=
None
lastAsymId
=
None
if
lastResId
!=
row
[
resIdCol
]
or
lastAsymId
!=
asymId
:
# The start of a new residue.
res
=
top
.
addResidue
(
row
[
resNameCol
],
chain
)
lastResId
=
row
[
resIdCol
]
lastAsymId
=
asymId
element
=
None
try
:
element
=
elem
.
get_by_symbol
(
row
[
elementCol
])
except
KeyError
:
pass
atom
=
top
.
addAtom
(
row
[
atomNameCol
],
element
,
res
)
atomTable
[
atomKey
]
=
atom
else
:
# This row defines coordinates for an existing atom in one of the later models.
try
:
atom
=
atomTable
[
atomKey
]
except
KeyError
:
raise
ValueError
(
'Unknown atom %s in residue %s %s for model %s'
%
(
row
[
atomNameCol
],
row
[
resNameCol
],
row
[
resIdCol
],
model
))
if
atom
.
index
!=
len
(
self
.
_positions
[
modelIndex
]):
raise
ValueError
(
'Atom %s for model %s does not match the order of atoms for model %s'
%
(
row
[
atomIdCol
],
model
,
models
[
0
]))
self
.
_positions
[
modelIndex
].
append
(
Vec3
(
float
(
row
[
xCol
]),
float
(
row
[
yCol
]),
float
(
row
[
zCol
]))
*
0.1
)
for
i
in
range
(
len
(
self
.
_positions
)):
self
.
_positions
[
i
]
=
self
.
_positions
[
i
]
*
nanometers
## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame.
self
.
positions
=
self
.
_positions
[
0
]
self
.
topology
.
createStandardBonds
()
self
.
topology
.
createDisulfideBonds
(
self
.
positions
)
self
.
_numpyPositions
=
None
# Record unit cell information, if present.
cell
=
block
.
getObj
(
'cell'
)
if
cell
is
not
None
and
cell
.
getRowCount
()
>
0
:
row
=
cell
.
getRow
(
0
)
cellSize
=
[
float
(
row
[
cell
.
getAttributeIndex
(
attribute
)])
for
attribute
in
(
'length_a'
,
'length_b'
,
'length_c'
)]
*
angstroms
self
.
topology
.
setUnitCellDimensions
(
cellSize
)
# Add bonds based on struct_conn records.
connectData
=
block
.
getObj
(
'struct_conn'
)
if
connectData
is
not
None
:
res1Col
=
connectData
.
getAttributeIndex
(
'ptnr1_label_seq_id'
)
res2Col
=
connectData
.
getAttributeIndex
(
'ptnr2_label_seq_id'
)
atom1Col
=
connectData
.
getAttributeIndex
(
'ptnr1_label_atom_id'
)
atom2Col
=
connectData
.
getAttributeIndex
(
'ptnr2_label_atom_id'
)
asym1Col
=
connectData
.
getAttributeIndex
(
'ptnr1_label_asym_id'
)
asym2Col
=
connectData
.
getAttributeIndex
(
'ptnr2_label_asym_id'
)
typeCol
=
connectData
.
getAttributeIndex
(
'conn_type_id'
)
connectBonds
=
[]
for
row
in
connectData
.
getRowList
():
type
=
row
[
typeCol
][:
6
]
if
type
in
(
'covale'
,
'disulf'
,
'modres'
):
key1
=
(
row
[
res1Col
],
row
[
asym1Col
],
row
[
atom1Col
])
key2
=
(
row
[
res2Col
],
row
[
asym2Col
],
row
[
atom2Col
])
if
key1
in
atomTable
and
key2
in
atomTable
:
connectBonds
.
append
((
atomTable
[
key1
],
atomTable
[
key2
]))
if
len
(
connectBonds
)
>
0
:
# Only add bonds that don't already exist.
existingBonds
=
set
(
top
.
bonds
())
for
bond
in
connectBonds
:
if
bond
not
in
existingBonds
and
(
bond
[
1
],
bond
[
0
])
not
in
existingBonds
:
top
.
addBond
(
bond
[
0
],
bond
[
1
])
existingBonds
.
add
(
bond
)
def
getTopology
(
self
):
"""Get the Topology of the model."""
return
self
.
topology
def
getNumFrames
(
self
):
"""Get the number of frames stored in the file."""
return
len
(
self
.
_positions
)
def
getPositions
(
self
,
asNumpy
=
False
,
frame
=
0
):
"""Get the atomic positions.
Parameters:
- asNumpy (boolean=False) if true, the values are returned as a numpy array instead of a list of Vec3s
- frame (int=0) the index of the frame for which to get positions
"""
if
asNumpy
:
if
self
.
_numpyPositions
is
None
:
self
.
_numpyPositions
=
[
None
]
*
len
(
self
.
_positions
)
if
self
.
_numpyPositions
[
frame
]
is
None
:
self
.
_numpyPositions
[
frame
]
=
Quantity
(
numpy
.
array
(
self
.
_positions
[
frame
].
value_in_unit
(
nanometers
)),
nanometers
)
return
self
.
_numpyPositions
[
frame
]
return
self
.
_positions
[
frame
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment