Merge pull request #702 from peastman/pdbx

Created reader for PDBx/mmCIF files

Merge pull request #702 from peastman/pdbx
Created reader for PDBx/mmCIF files
db72d0cb · peastman · 4ab3b428 · 49722158 · db72d0cb · db72d0cb
Commit db72d0cb authored Nov 06, 2014 by peastman
14 changed files
--- a/docs-source/licenses/Licenses.txt
+++ b/docs-source/licenses/Licenses.txt
@@ -2,7 +2,7 @@ OpenMM was developed by Simbios, the NIH National Center for Physics-Based
 Simulation of Biological Structures at Stanford, funded under the NIH Roadmap
 for Medical Research, grant U54 GM072970. See https://simtk.org.
-Portions copyright © 2008-2014 Stanford University and the Authors.
+Portions copyright � 2008-2014 Stanford University and the Authors.
 There are several licenses which cover different parts of OpenMM as described
 below.
@@ -119,3 +119,11 @@ freely, subject to the following restrictions:
 2. Altered source versions must be plainly marked as such, and must not be
   misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
+6. PdbxReader
+OpenMM uses the PDBx/mmCIF parser written by John Westbrook.  It is distributed
+under the Creative Commons Attribution 3.0 Unported license.  For details, see
+https://creativecommons.org/licenses/by/3.0.  This library was modified to move
+it inside the simtk.openmm.app.internal module.
\ No newline at end of file
--- a/wrappers/python/setup.py
+++ b/wrappers/python/setup.py
@@ -145,7 +145,10 @@ def buildKeywordDictionary(major_version_num=MAJOR_VERSION_NUM,
                                          "simtk.openmm",
                                          "simtk.openmm.app",
                                          "simtk.openmm.app.internal",
-                                          "simtk.openmm.app.internal.charmm"]
+                                          "simtk.openmm.app.internal.charmm",
+                                          "simtk.openmm.app.internal.pdbx",
+                                          "simtk.openmm.app.internal.pdbx.reader",
+                                          "simtk.openmm.app.internal.pdbx.writer"]
    setupKeywords["data_files"]        = []
    setupKeywords["package_data"]      = {"simtk" : [],
                                          "simtk.unit" : [],

--- a/wrappers/python/simtk/openmm/app/__init__.py
+++ b/wrappers/python/simtk/openmm/app/__init__.py
@@ -12,6 +12,7 @@ __email__ = "peastman@stanford.edu"
 from topology import Topology, Chain, Residue, Atom
 from pdbfile import PDBFile
+from pdbxfile import PDBxFile
 from forcefield import ForceField
 from simulation import Simulation
 from pdbreporter import PDBReporter

--- a/wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
+##
+# File:    PdbxReadWriteTests.py
+# Author:  jdw
+# Date:    9-Oct-2011
+# Version: 0.001
+#
+# Updated:
+#         24-Oct-2012 jdw update path details and reorganize.
+#
+##
+"""  Various tests caess for PDBx/mmCIF data file and dictionary reader and writer. 
+"""
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+import sys, unittest, traceback
+import sys, time, os, os.path, shutil
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
+from simtk.openmm.app.internal.pdbx.writer.PdbxWriter import PdbxWriter
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+class PdbxReadWriteTests(unittest.TestCase):
+    def setUp(self):
+        self.lfh=sys.stdout
+        self.verbose=False
+        self.pathPdbxDataFile     = "../tests/1kip.cif"
+        self.pathOutputFile       = "testOutputDataFile.cif"
+    def tearDown(self):
+        pass
+    def testSimpleInitialization(self):
+        """Test case -  Simple initialization of a data category and data block
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            fn="test-simple.cif"
+            attributeNameList=['aOne','aTwo','aThree','aFour','aFive','aSix','aSeven','aEight','aNine','aTen']
+            rowList=[[1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10] 
+                     ]
+            nameCat='myCategory'
+            #
+            #
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory(nameCat,attributeNameList,rowList)
+            aCat.printIt()
+            curContainer.append(aCat)
+            curContainer.printIt()
+            #
+            myContainerList=[]
+            myContainerList.append(curContainer)
+            ofh = open(fn, "w")        
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myContainerList)
+            ofh.close()
+            myContainerList=[]            
+            ifh = open(fn, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myContainerList)
+            ifh.close()
+            for container in myContainerList:
+                for objName in container.getObjNameList():
+                    name,aList,rList=container.getObj(objName).get()
+                    self.lfh.write("Recovered data category  %s\n" % name)
+                    self.lfh.write("Attribute list           %r\n" % repr(aList))
+                    self.lfh.write("Row list                 %r\n" % repr(rList))                                        
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+    def testWriteDataFile(self): 
+        """Test case -  write data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ofh = open("test-output.cif", "w")
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([7,6,5,4,3,2,1])
+            aCat.printIt()            
+            curContainer.append(aCat)
+            curContainer.printIt()
+            #
+            myDataList.append(curContainer)
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+    def testUpdateDataFile(self): 
+        """Test case -  update data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            # Create a initial data file --
+            #
+            myDataList=[]
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append([9,2,3,4,5,6,7])
+            aCat.append([10,2,3,4,5,6,7])
+            aCat.append([11,2,3,4,5,6,7])
+            aCat.append([12,2,3,4,5,6,7])
+            #self.lfh.write("Assigned data category state-----------------\n")            
+            #aCat.dumpIt(fh=self.lfh)
+            curContainer.append(aCat)
+            myDataList.append(curContainer)
+            ofh = open("test-output-1.cif", "w")            
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+            #
+            #
+            # Read and update the data -
+            # 
+            myDataList=[]
+            ifh = open("test-output-1.cif", "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()
+            #
+            myBlock=myDataList[0]
+            myBlock.printIt()
+            myCat=myBlock.getObj('pdbx_seqtool_mapping_ref')
+            myCat.printIt()
+            for iRow in xrange(0,myCat.getRowCount()):
+                myCat.setValue('some value', 'ref_mon_id',iRow)
+                myCat.setValue(100, 'ref_mon_num',iRow)
+            ofh = open("test-output-2.cif", "w")            
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+            #
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+    def testReadDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+    def testReadWriteDataFile(self): 
+        """Test case -  data file read write test
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            myDataList=[]            
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+            ofh = open(self.pathOutputFile, "w")
+            pWr=PdbxWriter(ofh)
+            pWr.write(myDataList)        
+            ofh.close()
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+def simpleSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(PdbxReadWriteTests("testSimpleInitialization"))
+    suiteSelect.addTest(PdbxReadWriteTests("testUpdateDataFile"))        
+    suiteSelect.addTest(PdbxReadWriteTests("testReadWriteDataFile"))    
+    return suiteSelect
+if __name__ == '__main__':
+    #
+    mySuite=simpleSuite()      
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
+    #
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
+##
+# File:  PdbxReader.py
+# Date:  2012-01-09  Jdw  Adapted from PdbxParser
+#
+# Updates:
+#
+# 2012-01-09 - (jdw) Separate reader and writer classes.
+#
+# 2012-09-02 - (jdw)  Revise tokenizer to better handle embedded quoting.
+#
+##
+"""
+PDBx/mmCIF dictionary and data file parser.
+Acknowledgements:
+ The tokenizer used in this module is modeled after the clever parser design
+ used in the PyMMLIB package.
+ PyMMLib Development Group
+ Authors: Ethan Merritt: merritt@u.washington.ed  & Jay Painter: jay.painter@gmail.com
+ See:  http://pymmlib.sourceforge.net/
+"""
+import re,sys
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+class PdbxError(Exception):
+    """ Class for catch general errors 
+    """
+    pass
+class SyntaxError(Exception):
+    """ Class for catching syntax errors 
+    """
+    def __init__(self, lineNumber, text):
+        Exception.__init__(self)
+        self.lineNumber = lineNumber
+        self.text = text
+    def __str__(self):
+        return "%%ERROR - [at line: %d] %s" % (self.lineNumber, self.text)
+class PdbxReader(object):
+    """ PDBx reader for data files and dictionaries.
+    """
+    def __init__(self,ifh):
+        """  ifh - input file handle returned by open()
+        """
+        # 
+        self.__curLineNumber = 0        
+        self.__ifh=ifh
+        self.__stateDict={"data":   "ST_DATA_CONTAINER",
+                          "loop":   "ST_TABLE",
+                          "global": "ST_GLOBAL_CONTAINER",
+                          "save":   "ST_DEFINITION",
+                          "stop":   "ST_STOP"}
+    def read(self, containerList):
+        """
+        Appends to the input list of definition and data containers.
+        """
+        self.__curLineNumber = 0
+        try:
+            self.__parser(self.__tokenizer(self.__ifh), containerList)
+        except StopIteration:
+            pass
+        else:
+            raise PdbxError()
+    def __syntaxError(self, errText):
+        raise SyntaxError(self.__curLineNumber, errText)
+    def __getContainerName(self,inWord):
+        """ Returns the name of the data_ or save_ container
+        """
+        return str(inWord[5:]).strip()
+    def __getState(self, inWord):
+        """Identifies reserved syntax elements and assigns an associated state.  
+           Returns: (reserved word, state)
+           where - 
+              reserved word -  is one of CIF syntax elements:
+                               data_, loop_, global_, save_, stop_
+              state - the parser state required to process this next section.
+        """
+        i = inWord.find("_")
+        if i == -1:
+            return None,"ST_UNKNOWN"
+        try:
+            rWord=inWord[:i].lower()            
+            return rWord, self.__stateDict[rWord]
+        except:
+            return None,"ST_UNKNOWN"
+    def __parser(self, tokenizer, containerList):
+        """ Parser for PDBx data files and dictionaries.
+            Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
+                    quoted strings (single, double and multi-line semi-colon delimited), and unquoted
+                    strings.
+                    containerList -  list-type container for data and definition objects parsed from
+                                     from the input file.
+            Return:
+                    containerList - is appended with data and definition objects - 
+        """
+        # Working container - data or definition
+        curContainer = None
+        #
+        # Working category container 
+        categoryIndex = {}
+        curCategory = None
+        #
+        curRow = None
+        state =  None
+        # Find the first reserved word and begin capturing data.
+        #
+        while True:
+            curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+            if curWord is None:
+                continue
+            reservedWord, state  = self.__getState(curWord)
+            if reservedWord is not None:
+                break
+        while True:
+            #
+            #  Set the current state  -
+            #
+            #  At this point in the processing cycle we are expecting a token containing
+            #  either a '_category.attribute'  or a reserved word.  
+            #
+            if curCatName is not None:
+                state = "ST_KEY_VALUE_PAIR"
+            elif curWord is not None:
+                reservedWord, state = self.__getState(curWord)
+            else:
+                self.__syntaxError("Miscellaneous syntax error")
+                return            
+            #
+            # Process  _category.attribute  value assignments 
+            #
+            if state == "ST_KEY_VALUE_PAIR":
+                try:
+                    curCategory = categoryIndex[curCatName]
+                except KeyError:
+                    # A new category is encountered - create a container and add a row 
+                    curCategory = categoryIndex[curCatName] = DataCategory(curCatName)
+                    try:
+                        curContainer.append(curCategory)
+                    except AttributeError:
+                        self.__syntaxError("Category cannot be added to  data_ block")
+                        return
+                    curRow = []                    
+                    curCategory.append(curRow)
+                else:
+                    # Recover the existing row from the category
+                    try:
+                        curRow = curCategory[0] 
+                    except IndexError:
+                        self.__syntaxError("Internal index error accessing category data")
+                        return
+                # Check for duplicate attributes and add attribute to table.
+                if curAttName in curCategory.getAttributeList():
+                    self.__syntaxError("Duplicate attribute encountered in category")
+                    return
+                else:
+                    curCategory.appendAttribute(curAttName)
+                # Get the data for this attribute from the next token
+                tCat, tAtt, curQuotedString, curWord = tokenizer.next()
+                if tCat is not None or (curQuotedString is None and curWord is None):
+                    self.__syntaxError("Missing data for item _%s.%s" % (curCatName,curAttName))
+                if curWord is not None:
+                    # 
+                    # Validation check token for misplaced reserved words  -  
+                    #
+                    reservedWord, state  = self.__getState(curWord)
+                    if reservedWord is not None:
+                        self.__syntaxError("Unexpected reserved word: %s" % (reservedWord))
+                    curRow.append(curWord)
+                elif curQuotedString is not None:
+                    curRow.append(curQuotedString)
+                else:
+                    self.__syntaxError("Missing value in item-value pair")
+                curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+                continue
+            #
+            # Process a loop_ declaration and associated data -
+            #
+            elif state == "ST_TABLE":
+                # The category name in the next curCatName,curAttName pair
+                #    defines the name of the category container.
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+                if curCatName is None or curAttName is None:
+                    self.__syntaxError("Unexpected token in loop_ declaration")
+                    return
+                # Check for a previous category declaration.
+                if categoryIndex.has_key(curCatName):
+                    self.__syntaxError("Duplicate category declaration in loop_")
+                    return
+                curCategory = DataCategory(curCatName)
+                try:
+                    curContainer.append(curCategory)
+                except AttributeError:
+                    self.__syntaxError("loop_ declaration outside of data_ block or save_ frame")
+                    return
+                curCategory.appendAttribute(curAttName)
+                # Read the rest of the loop_ declaration 
+                while True:
+                    curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+                    if curCatName is None:
+                        break
+                    if curCatName != curCategory.getName():
+                        self.__syntaxError("Changed category name in loop_ declaration")
+                        return
+                    curCategory.appendAttribute(curAttName)
+                # If the next token is a 'word', check it for any reserved words - 
+                if curWord is not None:
+                    reservedWord, state  = self.__getState(curWord)
+                    if reservedWord is not None:
+                        if reservedWord == "stop":
+                            return
+                        else:
+                            self.__syntaxError("Unexpected reserved word after loop declaration: %s" % (reservedWord))
+                # Read the table of data for this loop_ - 
+                while True:
+                    curRow = []                    
+                    curCategory.append(curRow)
+                    for tAtt in curCategory.getAttributeList():
+                        if curWord is not None:
+                            curRow.append(curWord)
+                        elif curQuotedString is not None:
+                            curRow.append(curQuotedString)
+                        curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+                    # loop_ data processing ends if - 
+                    # A new _category.attribute is encountered
+                    if curCatName is not None:
+                        break
+                    # A reserved word is encountered
+                    if curWord is not None:
+                        reservedWord, state = self.__getState(curWord)
+                        if reservedWord is not None:
+                            break
+                continue
+            elif state == "ST_DEFINITION":
+                # Ignore trailing unnamed saveframe delimiters e.g. 'save_'
+                sName=self.__getContainerName(curWord)
+                if (len(sName) > 0):
+                    curContainer = DefinitionContainer(sName)
+                    containerList.append(curContainer)
+                    categoryIndex = {}
+                    curCategory = None
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+            elif state == "ST_DATA_CONTAINER":
+                #
+                dName=self.__getContainerName(curWord)
+                if len(dName) == 0:
+                    dName="unidentified"
+                curContainer = DataContainer(dName)
+                containerList.append(curContainer)
+                categoryIndex = {}
+                curCategory = None
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+            elif state == "ST_STOP":
+                return
+            elif state == "ST_GLOBAL":
+                curContainer = DataContainer("blank-global")
+                curContainer.setGlobal()
+                containerList.append(curContainer)
+                categoryIndex = {}
+                curCategory = None
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+            elif state == "ST_UNKNOWN":
+                self.__syntaxError("Unrecogized syntax element: " + str(curWord))
+                return
+    def __tokenizer(self, ifh):
+        """ Tokenizer method for the mmCIF syntax file - 
+            Each return/yield from this method returns information about
+            the next token in the form of a tuple with the following structure.
+            (category name, attribute name, quoted strings, words w/o quotes or white space)
+            Differentiated the reqular expression to the better handle embedded quotes.
+        """
+        #
+        # Regex definition for mmCIF syntax - semi-colon delimited strings are handled
+        #                                     outside of this regex.
+        mmcifRe = re.compile(
+            r"(?:"
+             "(?:_(.+?)[.](\S+))"               "|"  # _category.attribute
+             "(?:['](.*?)(?:[']\s|[']$))"       "|"  # single quoted strings
+             "(?:[\"](.*?)(?:[\"]\s|[\"]$))"    "|"  # double quoted strings             
+             "(?:\s*#.*$)"                      "|"  # comments (dumped)
+             "(\S+)"                                 # unquoted words
+             ")")
+        fileIter = iter(ifh)
+        ## Tokenizer loop begins here ---
+        while True:
+            line = fileIter.next()
+            self.__curLineNumber += 1
+            # Dump comments
+            if line.startswith("#"):
+                continue
+            # Gobble up the entire semi-colon/multi-line delimited string and
+            #    and stuff this into the string slot in the return tuple
+            #
+            if line.startswith(";"):
+                mlString = [line[1:]]
+                while True:
+                    line = fileIter.next()
+                    self.__curLineNumber += 1
+                    if line.startswith(";"):
+                        break
+                    mlString.append(line)
+                # remove trailing new-line that is part of the \n; delimiter
+                mlString[-1] = mlString[-1].rstrip()
+                #
+                yield (None, None, "".join(mlString), None)
+                #
+                # Need to process the remainder of the current line -
+                line = line[1:]
+                #continue
+            # Apply regex to the current line consolidate the single/double
+            # quoted within the quoted string category
+            for it in mmcifRe.finditer(line):
+                tgroups = it.groups()
+                if tgroups != (None, None, None, None, None):
+                    if tgroups[2] is not None:
+                        qs = tgroups[2]
+                    elif tgroups[3] is not None:
+                        qs = tgroups[3]
+                    else:
+                        qs = None
+                    groups = (tgroups[0],tgroups[1],qs,tgroups[4])
+                    yield groups
+    def __tokenizerOrg(self, ifh):
+        """ Tokenizer method for the mmCIF syntax file - 
+            Each return/yield from this method returns information about
+            the next token in the form of a tuple with the following structure.
+            (category name, attribute name, quoted strings, words w/o quotes or white space)
+        """
+        #
+        # Regex definition for mmCIF syntax - semi-colon delimited strings are handled
+        #                                     outside of this regex.
+        mmcifRe = re.compile(
+            r"(?:"
+             "(?:_(.+?)[.](\S+))"               "|"  # _category.attribute
+             "(?:['\"](.*?)(?:['\"]\s|['\"]$))" "|"  # quoted strings
+             "(?:\s*#.*$)"                      "|"  # comments (dumped)
+             "(\S+)"                                 # unquoted words
+             ")")
+        fileIter = iter(ifh)
+        ## Tokenizer loop begins here ---
+        while True:
+            line = fileIter.next()
+            self.__curLineNumber += 1
+            # Dump comments
+            if line.startswith("#"):
+                continue
+            # Gobble up the entire semi-colon/multi-line delimited string and
+            #    and stuff this into the string slot in the return tuple
+            #
+            if line.startswith(";"):
+                mlString = [line[1:]]
+                while True:
+                    line = fileIter.next()
+                    self.__curLineNumber += 1
+                    if line.startswith(";"):
+                        break
+                    mlString.append(line)
+                # remove trailing new-line that is part of the \n; delimiter
+                mlString[-1] = mlString[-1].rstrip()
+                #
+                yield (None, None, "".join(mlString), None)
+                #
+                # Need to process the remainder of the current line -
+                line = line[1:]
+                #continue
+            ## Apply regex to the current line 
+            for it in mmcifRe.finditer(line):
+                groups = it.groups()
+                if groups != (None, None, None, None):
+                    yield groups
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
+##
+# File:    PdbxReaderTests.py
+# Author:  jdw
+# Date:    9-Jan-2012
+# Version: 0.001
+#
+# Update:
+#  27-Sep-2012  jdw add test case for reading PDBx structure factor file 
+#
+##
+"""
+Test cases for reading PDBx/mmCIF data files PdbxReader class -
+"""
+import sys, unittest, traceback
+import sys, time, os, os.path, shutil
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+class PdbxReaderTests(unittest.TestCase):
+    def setUp(self):
+        self.lfh=sys.stderr
+        self.verbose=False
+        self.pathPdbxDataFile     ="../tests/1kip.cif"
+        self.pathBigPdbxDataFile  ="../tests/1ffk.cif"
+        self.pathSFDataFile       ="../tests/1kip-sf.cif"
+    def tearDown(self):
+        pass
+    def testReadSmallDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+    def testReadBigDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathBigPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+    def testReadSFDataFile(self): 
+        """Test case -  read PDB structure factor data  file and compute statistics on f/sig(f).
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myContainerList=[]
+            ifh = open(self.pathSFDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myContainerList)
+            c0=myContainerList[0]
+            #
+            catObj=c0.getObj("refln")
+            if catObj is None:
+                return false
+            nRows=catObj.getRowCount()
+            #
+            # Get column name index.
+            #
+            itDict={}
+            itNameList=catObj.getItemNameList()
+            for idxIt,itName in enumerate(itNameList):
+                itDict[str(itName).lower()]=idxIt
+                #
+            idf=itDict['_refln.f_meas_au']
+            idsigf=itDict['_refln.f_meas_sigma_au']
+            minR=100
+            maxR=-1
+            sumR=0
+            icount=0
+            for row in  catObj.getRowList():
+                try:
+                    f=float(row[idf])
+                    sigf=float(row[idsigf])
+                    ratio=sigf/f
+                    #self.lfh.write(" %f %f %f\n" % (f,sigf,ratio))
+                    maxR=max(maxR,ratio)
+                    minR=min(minR,ratio)
+                    sumR+=ratio
+                    icount+=1
+                except:
+                    continue
+            ifh.close()
+            self.lfh.write("f/sig(f) min %f max %f avg %f count %d\n" % (minR, maxR, sumR/icount,icount))
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+def simpleSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(PdbxReaderTests("testReadBigDataFile"))
+    suiteSelect.addTest(PdbxReaderTests("testReadSmallDataFile"))    
+    suiteSelect.addTest(PdbxReaderTests("testReadSFDataFile"))
+    return suiteSelect
+if __name__ == '__main__':
+    mySuite=simpleSuite()    
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
+    #
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
+##
+# File:  PdbxWriter.py
+# Date:  2011-10-09 Jdw  Adapted from PdbxParser.py
+#
+# Updates:
+#    5-Apr-2011 jdw  Using the double quote format preference
+#   23-Oct-2012 jdw  update path details and reorganize.
+#
+###
+"""
+Classes for writing data and dictionary containers in PDBx/mmCIF format.
+"""
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+class PdbxError(Exception):
+    """ Class for catch general errors 
+    """
+    pass
+class PdbxWriter(object):
+    """Write PDBx data files or dictionaries using the input container
+       or container list.
+    """
+    def __init__(self,ofh=sys.stdout):
+        self.__ofh=ofh
+        self.__containerList=[]
+        self.__MAXIMUM_LINE_LENGTH = 2048
+        self.__SPACING = 2
+        self.__INDENT_DEFINITION = 3
+        self.__indentSpace = " " * self.__INDENT_DEFINITION        
+        self.__doDefinitionIndent=False
+        # Maximum number of rows checked for value length and format
+        self.__rowPartition=None
+    def setRowPartition(self,numRows):
+        ''' Maximum number of rows checked for value length and format
+        '''
+        self.__rowPartition=numRows
+    def write(self, containerList):
+        self.__containerList=containerList
+        for  container in self.__containerList:
+            self.writeContainer(container)
+    def writeContainer(self,container):
+        indS=" " *  self.__INDENT_DEFINITION
+        if isinstance(container, DefinitionContainer):
+            self.__write("save_%s\n" % container.getName())
+            self.__doDefinitionIndent=True
+            self.__write(indS+"#\n")            
+        elif isinstance(container, DataContainer):
+            if (container.getGlobal()):
+                self.__write("global_\n")
+                self.__doDefinitionIndent=False
+                self.__write("\n")            
+            else:
+                self.__write("data_%s\n" % container.getName())
+                self.__doDefinitionIndent=False
+                self.__write("#\n")                            
+        for nm in container.getObjNameList():
+            obj=container.getObj(nm)
+            objL=obj.getRowList()
+            # Skip empty objects
+            if len(objL) == 0:
+                continue
+            # Item - value formattting 
+            elif len(objL) == 1:
+                self.__writeItemValueFormat(obj)
+            # Table formatting - 
+            elif len(objL) > 1 and len(obj.getAttributeList()) > 0:
+                self.__writeTableFormat(obj)
+            else:
+                raise PdbxError()
+            if self.__doDefinitionIndent:
+                self.__write(indS+"#")
+            else:
+                self.__write("#")
+        # Add a trailing saveframe reserved word 
+        if isinstance(container, DefinitionContainer):
+            self.__write("\nsave_\n")
+        self.__write("#\n")            
+    def __write(self, st):
+        self.__ofh.write(st)
+    def __writeItemValueFormat(self, myCategory):
+        # Compute the maximum item name length within this category - 
+        attributeNameLengthMax  = 0
+        for attributeName in myCategory.getAttributeList():
+            attributeNameLengthMax = max(attributeNameLengthMax, len(attributeName))
+        itemNameLengthMax = self.__SPACING + len(myCategory.getName()) + attributeNameLengthMax + 2
+        #
+        lineList=[]
+        lineList.append("#\n")        
+        for attributeName,iPos in myCategory.getAttributeListWithOrder():        
+            if  self.__doDefinitionIndent:
+                #        - add indent --                
+                lineList.append(self.__indentSpace)
+            itemName = "_%s.%s" % (myCategory.getName(), attributeName)                
+            lineList.append(itemName.ljust(itemNameLengthMax))
+            lineList.append(myCategory.getValueFormatted(attributeName,0))            
+            lineList.append("\n")
+        self.__write("".join(lineList))
+    def __writeTableFormat(self, myCategory):
+        # Write the declaration of the loop_
+        #
+        lineList=[]
+        lineList.append('#\n')
+        if  self.__doDefinitionIndent:            
+            lineList.append(self.__indentSpace)
+        lineList.append("loop_")
+        for attributeName in myCategory.getAttributeList():
+            lineList.append('\n')            
+            if  self.__doDefinitionIndent:            
+                lineList.append(self.__indentSpace)
+            itemName = "_%s.%s" % (myCategory.getName(), attributeName)
+            lineList.append(itemName)
+        self.__write("".join(lineList))
+        #
+        # Write the data in tabular format - 
+        #
+        #print myCategory.getName()
+        #print myCategory.getAttributeList()
+        #    For speed make the following evaluation on a portion of the table
+        if self.__rowPartition is not None:
+            numSteps=max(1,myCategory.getRowCount()/self.__rowPartition)
+        else:
+            numSteps=1
+        formatTypeList,dataTypeList=myCategory.getFormatTypeList(steps=numSteps)
+        maxLengthList=myCategory.getAttributeValueMaxLengthList(steps=numSteps)
+        spacing   = " " * self.__SPACING
+        #
+        #print formatTypeList
+        #print dataTypeList
+        #print maxLengthList
+        #
+        for iRow in range(myCategory.getRowCount()):
+            lineList     = []            
+            lineList.append('\n')
+            if  self.__doDefinitionIndent:
+                lineList.append(self.__indentSpace + "  ")
+            for iAt in range(myCategory.getAttributeCount()):
+                formatType = formatTypeList[iAt]
+                maxLength  = maxLengthList[iAt]
+                if (formatType == 'FT_UNQUOTED_STRING' or formatType == 'FT_NULL_VALUE'):
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.ljust(maxLength))
+                elif formatType == 'FT_NUMBER':
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.rjust(maxLength))
+                elif formatType == 'FT_QUOTED_STRING':
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.ljust(maxLength+2))
+                elif formatType == "FT_MULTI_LINE_STRING":
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val)
+                lineList.append(spacing)                    
+            self.__write("".join(lineList))
+        self.__write("\n")
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
+##
+# File:    PdbxWriterTests.py
+# Author:  jdw
+# Date:    3-November-2009
+# Version: 0.001
+#
+# Update:
+#  5-Apr-2011 jdw   Using the double quote format preference
+# 24-Oct-2012 jdw   Update path and examples.
+##
+"""
+Test implementing PDBx/mmCIF write and formatting operations.
+"""
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+import sys, unittest, traceback
+import sys, time, os, os.path, shutil
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader  import PdbxReader
+from simtk.openmm.app.internal.pdbx.writer.PdbxWriter  import PdbxWriter
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+class PdbxWriterTests(unittest.TestCase):
+    def setUp(self):
+        self.lfh=sys.stderr
+        self.verbose=False
+        self.pathPdbxDataFile     ="../tests/1kip.cif"
+        self.pathOutputFile       ="testOutputDataFile.cif"
+    def tearDown(self):
+        pass
+    def testWriteDataFile(self): 
+        """Test case -  write data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ofh = open("test-output.cif", "w")
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            curContainer.append(aCat)
+            myDataList.append(curContainer)
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+    def testUpdateDataFile(self): 
+        """Test case -  write data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            # Create a initial data file --
+            #
+            myDataList=[]
+            ofh = open("test-output-1.cif", "w")
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            curContainer.append(aCat)
+            myDataList.append(curContainer)
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+            #
+            # Read and update the data -
+            # 
+            myDataList=[]
+            ifh = open("test-output-1.cif", "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()
+            #
+            myBlock=myDataList[0]
+            myBlock.printIt()
+            myCat=myBlock.getObj('pdbx_seqtool_mapping_ref')
+            myCat.printIt()
+            for iRow in xrange(0,myCat.getRowCount()):
+                myCat.setValue('some value', 'ref_mon_id',iRow)
+                myCat.setValue(100, 'ref_mon_num',iRow)
+            ofh = open("test-output-2.cif", "w")            
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+    def testReadDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+    def testReadWriteDataFile(self): 
+        """Test case -  data file read write test
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")            
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+            ofh = open(self.pathOutputFile, "w")
+            pWr=PdbxWriter(ofh)
+            pWr.write(myDataList)        
+            ofh.close()
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+def suite():
+    return unittest.makeSuite(PdbxWriterTests,'test')
+if __name__ == '__main__':
+    unittest.main()
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
--- a/wrappers/python/simtk/openmm/app/pdbxfile.py
+++ b/wrappers/python/simtk/openmm/app/pdbxfile.py
+"""
+pdbfile.py: Used for loading PDB files.
+This is part of the OpenMM molecular simulation toolkit originating from
+Simbios, the NIH National Center for Physics-Based Simulation of
+Biological Structures at Stanford, funded under the NIH Roadmap for
+Medical Research, grant U54 GM072970. See https://simtk.org.
+Portions copyright (c) 2014 Stanford University and the Authors.
+Authors: Peter Eastman
+Contributors:
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+__author__ = "Peter Eastman"
+__version__ = "1.0"
+import os
+import sys
+from simtk.openmm import Vec3
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
+from simtk.openmm.app import Topology
+from simtk.unit import nanometers, angstroms, is_quantity, norm, Quantity
+import element as elem
+try:
+    import numpy
+except:
+    pass
+class PDBxFile(object):
+    """PDBxFile parses a PDBx/mmCIF file and constructs a Topology and a set of atom positions from it."""
+    def __init__(self, file):
+        """Load a PDBx/mmCIF file.
+        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().
+        Parameters:
+         - file (string) the name of the file to load.  Alternatively you can pass an open file object.
+        """
+        top = Topology()
+        ## The Topology read from the PDBx/mmCIF file
+        self.topology = top
+        self._positions = []
+        # Load the file.
+        inputFile = file
+        if isinstance(file, str):
+            inputFile = open(file)
+        reader = PdbxReader(inputFile)
+        data = []
+        reader.read(data)
+        block = data[0]
+        # Build the topology.
+        atomData = block.getObj('atom_site')
+        atomNameCol = atomData.getAttributeIndex('label_atom_id')
+        atomIdCol = atomData.getAttributeIndex('id')
+        resNameCol = atomData.getAttributeIndex('label_comp_id')
+        resIdCol = atomData.getAttributeIndex('label_seq_id')
+        asymIdCol = atomData.getAttributeIndex('label_asym_id')
+        chainIdCol = atomData.getAttributeIndex('label_entity_id')
+        elementCol = atomData.getAttributeIndex('type_symbol')
+        modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num')
+        xCol = atomData.getAttributeIndex('Cartn_x')
+        yCol = atomData.getAttributeIndex('Cartn_y')
+        zCol = atomData.getAttributeIndex('Cartn_z')
+        lastChainId = None
+        lastResId = None
+        lastAsymId = None
+        atomTable = {}
+        models = []
+        for row in atomData.getRowList():
+            asymId = ('A' if asymIdCol == -1 else row[asymIdCol])
+            atomKey = ((row[resIdCol], asymId, row[atomNameCol]))
+            model = ('1' if modelCol == -1 else row[modelCol])
+            if model not in models:
+                models.append(model)
+                self._positions.append([])
+            modelIndex = models.index(model)
+            if modelIndex == 0:
+                # This row defines a new atom.
+                if lastChainId != row[chainIdCol]:
+                    # The start of a new chain.
+                    chain = top.addChain()
+                    lastChainId = row[chainIdCol]
+                    lastResId = None
+                    lastAsymId = None
+                if lastResId != row[resIdCol] or lastAsymId != asymId:
+                    # The start of a new residue.
+                    res = top.addResidue(row[resNameCol], chain)
+                    lastResId = row[resIdCol]
+                    lastAsymId = asymId
+                element = None
+                try:
+                    element = elem.get_by_symbol(row[elementCol])
+                except KeyError:
+                    pass
+                atom = top.addAtom(row[atomNameCol], element, res)
+                atomTable[atomKey] = atom
+            else:
+                # This row defines coordinates for an existing atom in one of the later models.
+                try:
+                    atom = atomTable[atomKey]
+                except KeyError:
+                    raise ValueError('Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resIdCol], model))
+                if atom.index != len(self._positions[modelIndex]):
+                    raise ValueError('Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0]))
+            self._positions[modelIndex].append(Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol]))*0.1)
+        for i in range(len(self._positions)):
+            self._positions[i] = self._positions[i]*nanometers
+        ## The atom positions read from the PDBx/mmCIF file.  If the file contains multiple frames, these are the positions in the first frame.
+        self.positions = self._positions[0]
+        self.topology.createStandardBonds()
+        self.topology.createDisulfideBonds(self.positions)
+        self._numpyPositions = None
+        # Record unit cell information, if present.
+        cell = block.getObj('cell')
+        if cell is not None and cell.getRowCount() > 0:
+            row = cell.getRow(0)
+            cellSize = [float(row[cell.getAttributeIndex(attribute)]) for attribute in ('length_a', 'length_b', 'length_c')]*angstroms
+            self.topology.setUnitCellDimensions(cellSize)
+        # Add bonds based on struct_conn records.
+        connectData = block.getObj('struct_conn')
+        if connectData is not None:
+            res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id')
+            res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id')
+            atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id')
+            atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id')
+            asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id')
+            asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id')
+            typeCol = connectData.getAttributeIndex('conn_type_id')
+            connectBonds = []
+            for row in connectData.getRowList():
+                type = row[typeCol][:6]
+                if type in ('covale', 'disulf', 'modres'):
+                    key1 = (row[res1Col], row[asym1Col], row[atom1Col])
+                    key2 = (row[res2Col], row[asym2Col], row[atom2Col])
+                    if key1 in atomTable and key2 in atomTable:
+                        connectBonds.append((atomTable[key1], atomTable[key2]))
+            if len(connectBonds) > 0:
+                # Only add bonds that don't already exist.
+                existingBonds = set(top.bonds())
+                for bond in connectBonds:
+                    if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
+                        top.addBond(bond[0], bond[1])
+                        existingBonds.add(bond)
+    def getTopology(self):
+        """Get the Topology of the model."""
+        return self.topology
+    def getNumFrames(self):
+        """Get the number of frames stored in the file."""
+        return len(self._positions)
+    def getPositions(self, asNumpy=False, frame=0):
+        """Get the atomic positions.
+        Parameters:
+         - asNumpy (boolean=False) if true, the values are returned as a numpy array instead of a list of Vec3s
+         - frame (int=0) the index of the frame for which to get positions
+         """
+        if asNumpy:
+            if self._numpyPositions is None:
+                self._numpyPositions = [None]*len(self._positions)
+            if self._numpyPositions[frame] is None:
+                self._numpyPositions[frame] = Quantity(numpy.array(self._positions[frame].value_in_unit(nanometers)), nanometers)
+            return self._numpyPositions[frame]
+        return self._positions[frame]