Merge pull request #702 from peastman/pdbx

Created reader for PDBx/mmCIF files

Merge pull request #702 from peastman/pdbx
Created reader for PDBx/mmCIF files
db72d0cb · peastman · 4ab3b428 · 49722158 · db72d0cb · db72d0cb
Commit db72d0cb authored Nov 06, 2014 by peastman
14 changed files
--- a/docs-source/licenses/Licenses.txt
+++ b/docs-source/licenses/Licenses.txt
@@ -2,7 +2,7 @@ OpenMM was developed by Simbios, the NIH National Center for Physics-Based
 Simulation of Biological Structures at Stanford, funded under the NIH Roadmap
 for Medical Research, grant U54 GM072970. See https://simtk.org.

-Portions copyright © 2008-2014 Stanford University and the Authors.
+Portions copyright � 2008-2014 Stanford University and the Authors.

 There are several licenses which cover different parts of OpenMM as described
 below.
@@ -119,3 +119,11 @@ freely, subject to the following restrictions:
 2. Altered source versions must be plainly marked as such, and must not be
   misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
+
+
+6. PdbxReader
+
+OpenMM uses the PDBx/mmCIF parser written by John Westbrook.  It is distributed
+under the Creative Commons Attribution 3.0 Unported license.  For details, see
+https://creativecommons.org/licenses/by/3.0.  This library was modified to move
+it inside the simtk.openmm.app.internal module.
\ No newline at end of file
--- a/wrappers/python/setup.py
+++ b/wrappers/python/setup.py
@@ -145,7 +145,10 @@ def buildKeywordDictionary(major_version_num=MAJOR_VERSION_NUM,
                                          "simtk.openmm",
                                          "simtk.openmm.app",
                                          "simtk.openmm.app.internal",
-                                          "simtk.openmm.app.internal.charmm"]
+                                          "simtk.openmm.app.internal.charmm",
+                                          "simtk.openmm.app.internal.pdbx",
+                                          "simtk.openmm.app.internal.pdbx.reader",
+                                          "simtk.openmm.app.internal.pdbx.writer"]
    setupKeywords["data_files"]        = []
    setupKeywords["package_data"]      = {"simtk" : [],
                                          "simtk.unit" : [],

--- a/wrappers/python/simtk/openmm/app/__init__.py
+++ b/wrappers/python/simtk/openmm/app/__init__.py
@@ -12,6 +12,7 @@ __email__ = "peastman@stanford.edu"

 from topology import Topology, Chain, Residue, Atom
 from pdbfile import PDBFile
+from pdbxfile import PDBxFile
 from forcefield import ForceField
 from simulation import Simulation
 from pdbreporter import PDBReporter

--- a/wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/__init__.py
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxContainers.py
+##
+#
+# File:     PdbxContainers.py
+# Original: 02-Feb-2009   jdw
+#
+# Update:
+#   23-Mar-2011   jdw Added method to rename attributes in category containers.
+#   05-Apr-2011   jdw Change cif writer to select double quoting as preferred 
+#                     quoting style where possible.
+#   16-Jan-2012   jdw Create base class for DataCategory class
+#   22-Mar-2012   jdw when append attributes to existing categories update
+#                     existing rows with placeholder null values.
+#    2-Sep-2012   jdw add option to avoid embedded quoting that might
+#                     confuse simple parsers.
+#   28-Jun-2013   jdw export remove method
+#   29-Jun-2013   jdw export remove row method
+##
+"""
+
+A collection of container classes supporting the PDBx/mmCIF storage model.
+
+A base container class is defined which supports common features of
+data and definition containers.   PDBx data files are organized in
+sections called data blocks which are mapped to data containers.
+PDBx dictionaries contain definition sections and data sections
+which are mapped to definition and data containes respectively.
+
+Data in both PDBx data files and dictionaries are organized in
+data categories. In the PDBx syntax individual items or data
+identified by labels of the form '_categoryName.attributeName'.
+The terms category and attribute in PDBx jargon are analogous
+table and column in relational data model, or class and attribute
+in an object oriented data model.
+
+The DataCategory class provides base storage container for instance
+data and definition meta data.
+
+"""
+
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+
+import re,sys,traceback
+
+class CifName(object):
+    ''' Class of utilities for CIF-style data names -
+    '''
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def categoryPart(name):
+        tname=""
+        if name.startswith("_"):
+            tname=name[1:]
+        else:
+            tname=name
+
+        i = tname.find(".")
+        if i == -1:
+            return tname
+        else:
+            return tname[:i]
+
+    @staticmethod
+    def attributePart(name):
+        i = name.find(".")
+        if i == -1:
+            return None
+        else:
+            return name[i+1:]
+
+
+class ContainerBase(object):
+    ''' Container base class for data and definition objects.
+    '''
+    def __init__(self,name):
+        # The enclosing scope of the data container (e.g. data_/save_)
+        self.__name = name
+        # List of category names within this container -  
+        self.__objNameList=[]
+        # dictionary of DataCategory objects keyed by category name.
+        self.__objCatalog={}
+        self.__type=None
+
+    def getType(self):
+        return self.__type
+
+    def setType(self,type):
+        self.__type=type
+    
+    def getName(self):
+        return self.__name
+
+    def setName(self,name):
+        self.__name=name
+
+    def exists(self,name):
+        if self.__objCatalog.has_key(name):
+            return True
+        else:
+            return False
+        
+    def getObj(self,name):
+        if self.__objCatalog.has_key(name):
+            return self.__objCatalog[name]
+        else:
+            return None
+
+    def getObjNameList(self):
+        return self.__objNameList
+        
+    def append(self,obj):
+        """ Add the input object to the current object catalog. An existing object
+            of the same name will be overwritten.
+        """
+        if obj.getName() is not None:
+            if not self.__objCatalog.has_key(obj.getName()):
+                # self.__objNameList is keeping track of object order here -- 
+                self.__objNameList.append(obj.getName())
+            self.__objCatalog[obj.getName()]=obj
+
+    def replace(self,obj):
+        """ Replace an existing object with the input object
+        """
+        if ((obj.getName() is not None) and (self.__objCatalog.has_key(obj.getName())) ):
+            self.__objCatalog[obj.getName()]=obj
+        
+
+    def printIt(self,fh=sys.stdout,type="brief"):
+        fh.write("+ %s container: %30s contains %4d categories\n" %
+                 (self.getType(),self.getName(),len(self.__objNameList)))
+        for nm in self.__objNameList:
+            fh.write("--------------------------------------------\n")
+            fh.write("Data category: %s\n" % nm)
+            if type == 'brief':
+                self.__objCatalog[nm].printIt(fh)
+            else:
+                self.__objCatalog[nm].dumpIt(fh)
+
+
+    def rename(self,curName,newName):
+        """ Change the name of an object in place -
+        """
+        try:
+            i=self.__objNameList.index(curName)
+            self.__objNameList[i]=newName
+            self.__objCatalog[newName]=self.__objCatalog[curName]
+            self.__objCatalog[newName].setName(newName)
+            return True
+        except:
+            return False
+
+    def remove(self,curName):
+        """ Revmove object by name.  Return True on success or False otherwise.
+        """
+        try:
+            if self.__objCatalog.has_key(curName):
+                del self.__objCatalog[curName]
+                i=self.__objNameList.index(curName)
+                del self.__objNameList[i]
+                return True
+            else:
+                return False
+        except:
+            pass
+
+        return False
+
+
+class DefinitionContainer(ContainerBase):
+    def __init__(self,name):
+        super(DefinitionContainer,self).__init__(name)        
+        self.setType('definition')
+
+    def isCategory(self):
+        if self.exists('category'):
+            return True
+        return False
+
+    def isAttribute(self):
+        if self.exists('item'):
+            return True
+        return False
+    
+
+    def printIt(self,fh=sys.stdout,type="brief"):
+        fh.write("Definition container: %30s contains %4d categories\n" %
+                 (self.getName(),len(self.getObjNameList())))
+        if self.isCategory():
+            fh.write("Definition type: category\n")
+        elif self.isAttribute():
+            fh.write("Definition type: item\n")
+        else:
+            fh.write("Definition type: undefined\n")            
+
+        for nm in self.getObjNameList():
+            fh.write("--------------------------------------------\n")
+            fh.write("Definition category: %s\n" % nm)
+            if type == 'brief':
+                self.getObj(nm).printIt(fh)
+            else:
+                self.getObj(nm).dumpId(fh)
+
+
+
+class DataContainer(ContainerBase):
+    ''' Container class for DataCategory objects.
+    '''
+    def __init__(self,name):
+        super(DataContainer,self).__init__(name)
+        self.setType('data')
+        self.__globalFlag=False
+        
+    def invokeDataBlockMethod(self,type,method,db):
+        self.__currentRow = 1
+        exec method.getInline()
+
+    def setGlobal(self):
+        self.__globalFlag=True
+
+    def getGlobal(self):
+        return  self.__globalFlag
+
+        
+
+class DataCategoryBase(object):
+    """ Base object definition for a data category -
+    """
+    def __init__(self,name,attributeNameList=None,rowList=None):
+        self._name = name
+        #
+        if rowList is not None:
+            self._rowList=rowList
+        else:
+            self._rowList=[]
+
+        if attributeNameList is not None:
+            self._attributeNameList=attributeNameList
+        else:
+            self._attributeNameList=[]
+        #
+        # Derived class data -
+        #
+        self._catalog={}
+        self._numAttributes=0
+        #
+        self.__setup()
+
+    def __setup(self):
+        self._numAttributes = len(self._attributeNameList)
+        self._catalog={}
+        for attributeName in self._attributeNameList:
+            attributeNameLC  = attributeName.lower()
+            self._catalog[attributeNameLC] = attributeName
+    #
+    def setRowList(self,rowList):
+        self._rowList=rowList
+
+    def setAttributeNameList(self,attributeNameList):
+        self._attributeNameList=attributeNameList
+        self.__setup()
+
+    def setName(self,name):
+        self._name=name
+
+    def get(self):
+        return (self._name,self._attributeNameList,self._rowList)
+
+    
+        
+class DataCategory(DataCategoryBase):
+    """  Methods for creating, accessing, and formatting PDBx cif data categories.  
+    """
+    def __init__(self,name,attributeNameList=None,rowList=None):
+        super(DataCategory,self).__init__(name,attributeNameList,rowList)        
+        #
+        self.__lfh = sys.stdout
+        
+        self.__currentRowIndex=0
+        self.__currentAttribute=None
+        #
+        self.__avoidEmbeddedQuoting=False
+        #
+        # --------------------------------------------------------------------
+        # any whitespace 
+        self.__wsRe=re.compile(r"\s")
+        self.__wsAndQuotesRe=re.compile(r"[\s'\"]")        
+        # any newline or carriage control
+        self.__nlRe=re.compile(r"[\n\r]")
+        #
+        # single quote 
+        self.__sqRe=re.compile(r"[']")
+        #
+        self.__sqWsRe=re.compile(r"('\s)|(\s')")
+        
+        # double quote 
+        self.__dqRe=re.compile(r'["]')
+        self.__dqWsRe=re.compile(r'("\s)|(\s")')
+        #
+        self.__intRe=re.compile(r'^[0-9]+$')
+        self.__floatRe=re.compile(r'^-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)?$')
+        #
+        self.__dataTypeList=['DT_NULL_VALUE','DT_INTEGER','DT_FLOAT','DT_UNQUOTED_STRING','DT_ITEM_NAME',
+                             'DT_DOUBLE_QUOTED_STRING','DT_SINGLE_QUOTED_STRING','DT_MULTI_LINE_STRING']
+        self.__formatTypeList=['FT_NULL_VALUE','FT_NUMBER','FT_NUMBER','FT_UNQUOTED_STRING',
+                               'FT_QUOTED_STRING','FT_QUOTED_STRING','FT_QUOTED_STRING','FT_MULTI_LINE_STRING']
+        #
+
+
+    def __getitem__(self, x):
+        """  Implements list-type functionality - 
+             Implements op[x] for some special cases -
+                x=integer - returns the row in category (normal list behavior)
+                x=string  - returns the value of attribute 'x' in first row.
+        """
+        if isinstance(x, int):
+            #return self._rowList.__getitem__(x)
+            return self._rowList[x]
+
+        elif isinstance(x, str):
+            try:
+                #return self._rowList[0][x]
+                ii=self.getAttributeIndex(x)
+                return self._rowList[0][ii]
+            except (IndexError, KeyError):
+                raise KeyError
+        raise TypeError, x
+        
+    
+    def getCurrentAttribute(self):
+        return self.__currentAttribute
+
+
+    def getRowIndex(self):
+        return self.__currentRowIndex
+
+    def getRowList(self):
+        return self._rowList
+
+    def getRowCount(self):
+        return (len(self._rowList))
+
+    def getRow(self,index):
+        try:
+            return self._rowList[index]
+        except:
+            return []
+
+    def removeRow(self,index):
+        try:
+            if ((index >= 0) and (index < len(self._rowList))):
+                del self._rowList[index] 
+                if self.__currentRowIndex >= len(self._rowList):
+                    self.__currentRowIndex = len(self._rowList) -1
+                return True
+            else:
+                pass
+        except:
+            pass
+
+        return False
+
+    def getFullRow(self,index):
+        """ Return a full row based on the length of the the attribute list.
+        """
+        try:
+            if (len(self._rowList[index]) < self._numAttributes):
+                for ii in range( self._numAttributes-len(self._rowList[index])):
+                    self._rowList[index].append('?')
+            return self._rowList[index]
+        except:
+            return ['?' for ii in range(self._numAttributes)]
+
+    def getName(self):
+        return self._name
+    
+    def getAttributeList(self):
+        return self._attributeNameList
+
+    def getAttributeCount(self):
+        return len(self._attributeNameList)
+
+    def getAttributeListWithOrder(self):
+        oL=[]
+        for ii,att in enumerate(self._attributeNameList):
+            oL.append((att,ii))
+        return oL
+
+    def getAttributeIndex(self,attributeName):
+        try:
+            return self._attributeNameList.index(attributeName)
+        except:
+            return -1
+
+    def hasAttribute(self,attributeName):
+        return attributeName in self._attributeNameList
+    
+    def getIndex(self,attributeName):
+        try:
+            return self._attributeNameList.index(attributeName)
+        except:
+            return -1
+
+    def getItemNameList(self):
+        itemNameList=[]
+        for att in self._attributeNameList:
+            itemNameList.append("_"+self._name+"."+att)
+        return itemNameList
+    
+    def append(self,row):
+        #self.__lfh.write("PdbxContainer(append) category %s row %r\n"  % (self._name,row))
+        self._rowList.append(row)
+
+    def appendAttribute(self,attributeName):
+        attributeNameLC  = attributeName.lower()
+        if attributeNameLC  in self._catalog:
+            i = self._attributeNameList.index(self._catalog[attributeNameLC])
+            self._attributeNameList[i] = attributeName
+            self._catalog[attributeNameLC] = attributeName
+            #self.__lfh.write("Appending existing attribute %s\n" % attributeName)
+        else:
+            #self.__lfh.write("Appending existing attribute %s\n" % attributeName)            
+            self._attributeNameList.append(attributeName)
+            self._catalog[attributeNameLC] = attributeName
+            #
+        self._numAttributes = len(self._attributeNameList)
+
+
+    def appendAttributeExtendRows(self,attributeName):
+        attributeNameLC  = attributeName.lower()
+        if attributeNameLC  in self._catalog:
+            i = self._attributeNameList.index(self._catalog[attributeNameLC])
+            self._attributeNameList[i] = attributeName
+            self._catalog[attributeNameLC] = attributeName
+            self.__lfh.write("Appending existing attribute %s\n" % attributeName)
+        else:
+            self._attributeNameList.append(attributeName)
+            self._catalog[attributeNameLC] = attributeName
+            # add a placeholder to any existing rows for the new attribute.
+            if (len(self._rowList) > 0):
+                for row in self._rowList:
+                    row.append("?")
+            #
+        self._numAttributes = len(self._attributeNameList)
+
+        
+
+    def getValue(self,attributeName=None,rowIndex=None):
+        if attributeName is None:
+            attribute = self.__currentAttribute
+        else:
+            attribute = attributeName
+        if rowIndex is None:
+            rowI = self.__currentRowIndex
+        else:
+            rowI =rowIndex
+            
+        if isinstance(attribute, str) and isinstance(rowI,int):
+            try:
+                return self._rowList[rowI][self._attributeNameList.index(attribute)]
+            except (IndexError):
+                raise IndexError        
+        raise IndexError, attribute
+
+    def setValue(self,value,attributeName=None,rowIndex=None):
+        if attributeName is None:
+            attribute=self.__currentAttribute
+        else:
+            attribute=attributeName
+
+        if rowIndex is None:
+            rowI = self.__currentRowIndex
+        else:
+            rowI = rowIndex
+
+        if isinstance(attribute, str) and isinstance(rowI,int):
+            try:
+                # if row index is out of range - add the rows -
+                for ii in range(rowI+1 - len(self._rowList)):
+                    self._rowList.append(self.__emptyRow())                
+                # self._rowList[rowI][attribute]=value
+                ll=len(self._rowList[rowI])
+                ind=self._attributeNameList.index(attribute)
+
+                # extend the list if needed - 
+                if ( ind >= ll):
+                    self._rowList[rowI].extend([None for ii in xrange(2*ind -ll)])    
+                self._rowList[rowI][ind]=value                    
+            except (IndexError):
+                self.__lfh.write("DataCategory(setvalue) index error category %s attribute %s index %d value %r\n" %
+                                 (self._name,attribute,rowI,value))
+                traceback.print_exc(file=self.__lfh)                                
+                #raise IndexError
+            except (ValueError):
+                self.__lfh.write("DataCategory(setvalue) value error category %s attribute %s index %d value %r\n" %
+                                 (self._name,attribute,rowI,value))                
+                traceback.print_exc(file=self.__lfh)                
+                #raise ValueError
+
+    def __emptyRow(self):
+        return [None for ii in range(len(self._attributeNameList))]
+        
+    def replaceValue(self,oldValue,newValue,attributeName):
+        numReplace=0
+        if attributeName not in self._attributeNameList:
+            return numReplace
+        ind=self._attributeNameList.index(attributeName)
+        for row in self._rowList:
+            if row[ind] == oldValue:
+                row[ind]=newValue
+                numReplace += 1
+        return numReplace
+
+    def replaceSubstring(self,oldValue,newValue,attributeName):
+        ok=False
+        if attributeName not in self._attributeNameList:            
+            return ok
+        ind=self._attributeNameList.index(attributeName)
+        for row in self._rowList:
+            val=row[ind]
+            row[ind]=val.replace(oldValue,newValue)
+            if val != row[ind]:
+                ok=True
+        return ok
+        
+    def invokeAttributeMethod(self,attributeName,type,method,db):
+        self.__currentRowIndex = 0
+        self.__currentAttribute=attributeName
+        self.appendAttribute(attributeName)
+        currentRowIndex=self.__currentRowIndex
+        #
+        ind=self._attributeNameList.index(attributeName)
+        if len(self._rowList) == 0:
+            row=[None for ii in xrange(len(self._attributeNameList)*2)]
+            row[ind]=None
+            self._rowList.append(row)
+            
+        for row in self._rowList:
+            ll = len(row)
+            if (ind >= ll):
+                row.extend([None for ii in xrange(2*ind-ll)])
+                row[ind]=None
+            exec method.getInline()
+            self.__currentRowIndex+=1
+            currentRowIndex=self.__currentRowIndex
+
+    def invokeCategoryMethod(self,type,method,db):
+        self.__currentRowIndex = 0
+        exec method.getInline()
+
+    def getAttributeLengthMaximumList(self):
+        mList=[0 for i in len(self._attributeNameList)]
+        for row in self._rowList:
+            for indx,val in enumerate(row):
+                mList[indx] = max(mList[indx],len(val))
+        return mList
+            
+    def renameAttribute(self,curAttributeName,newAttributeName):
+        """ Change the name of an attribute in place -
+        """
+        try:
+            i=self._attributeNameList.index(curAttributeName)
+            self._attributeNameList[i]=newAttributeName
+            del self._catalog[curAttributeName.lower()]            
+            self._catalog[newAttributeName.lower()]=newAttributeName
+            return True
+        except:
+            return False
+        
+    def printIt(self,fh=sys.stdout):
+        fh.write("--------------------------------------------\n")
+        fh.write("  Category: %s attribute list length: %d\n" %
+                 (self._name,len(self._attributeNameList)))
+        for at in self._attributeNameList:
+            fh.write("  Category: %s attribute: %s\n" % (self._name,at))
+            
+        fh.write("  Row value list length: %d\n" % len(self._rowList))
+        #
+        for row in self._rowList[:2]:
+            #
+            if len(row) == len(self._attributeNameList):
+                for ii,v in enumerate(row):
+                    fh.write("        %30s: %s ...\n" % (self._attributeNameList[ii],str(v)[:30]))
+            else:
+                fh.write("+WARNING - %s data length %d attribute name length %s mismatched\n" %
+                         (self._name,len(row),len(self._attributeNameList)))
+
+    def dumpIt(self,fh=sys.stdout):
+        fh.write("--------------------------------------------\n")
+        fh.write("  Category: %s attribute list length: %d\n" %
+                 (self._name,len(self._attributeNameList)))
+        for at in self._attributeNameList:
+            fh.write("  Category: %s attribute: %s\n" % (self._name,at))
+            
+        fh.write("  Value list length: %d\n" % len(self._rowList))
+        for row in self._rowList:
+            for ii,v in enumerate(row):
+                fh.write("        %30s: %s\n" % (self._attributeNameList[ii],v))
+
+
+    def __formatPdbx(self, inp):
+        """ Format input data following PDBx quoting rules - 
+        """
+        try:
+            if (inp is None):
+                return ("?",'DT_NULL_VALUE')
+
+            # pure numerical values are returned as unquoted strings
+            if (isinstance(inp,int) or self.__intRe.search(str(inp))):
+                return ( [str(inp)],'DT_INTEGER')
+
+            if (isinstance(inp,float) or self.__floatRe.search(str(inp))):
+                return ([str(inp)],'DT_FLOAT')
+
+            # null value handling -
+
+            if (inp == "." or inp == "?"):
+                return ([inp],'DT_NULL_VALUE')
+
+            if (inp == ""):
+                return (["."],'DT_NULL_VALUE')
+
+            # Contains white space or quotes ?
+            if not self.__wsAndQuotesRe.search(inp):
+                if inp.startswith("_"):
+                    return (self.__doubleQuotedList(inp),'DT_ITEM_NAME')
+                else:
+                    return ([str(inp)],'DT_UNQUOTED_STRING')
+            else:
+                if self.__nlRe.search(inp):
+                    return (self.__semiColonQuotedList(inp),'DT_MULTI_LINE_STRING')
+                else:
+                    if (self.__avoidEmbeddedQuoting):                    
+                        # change priority to choose double quoting where possible.
+                        if not self.__dqRe.search(inp) and not self.__sqWsRe.search(inp):
+                            return (self.__doubleQuotedList(inp),'DT_DOUBLE_QUOTED_STRING')                                        
+                        elif not self.__sqRe.search(inp) and not self.__dqWsRe.search(inp):
+                            return (self.__singleQuotedList(inp),'DT_SINGLE_QUOTED_STRING')                                        
+                        else:
+                            return (self.__semiColonQuotedList(inp),'DT_MULTI_LINE_STRING')
+                    else:
+                        # change priority to choose double quoting where possible.
+                        if not self.__dqRe.search(inp):
+                            return (self.__doubleQuotedList(inp),'DT_DOUBLE_QUOTED_STRING')                                        
+                        elif not self.__sqRe.search(inp):
+                            return (self.__singleQuotedList(inp),'DT_SINGLE_QUOTED_STRING')                                        
+                        else:
+                            return (self.__semiColonQuotedList(inp),'DT_MULTI_LINE_STRING')
+                        
+                        
+        except:
+            traceback.print_exc(file=self.__lfh)                            
+
+    def __dataTypePdbx(self, inp):
+        """ Detect the PDBx data type - 
+        """
+        if (inp is None):
+            return ('DT_NULL_VALUE')
+        
+        # pure numerical values are returned as unquoted strings
+        if isinstance(inp,int) or self.__intRe.search(str(inp)):
+            return ('DT_INTEGER')
+
+        if isinstance(inp,float) or self.__floatRe.search(str(inp)):
+            return ('DT_FLOAT')
+
+        # null value handling -
+
+        if (inp == "." or inp == "?"):
+            return ('DT_NULL_VALUE')
+
+        if (inp == ""):
+            return ('DT_NULL_VALUE')
+
+        # Contains white space or quotes ?
+        if not self.__wsAndQuotesRe.search(inp):
+            if inp.startswith("_"):
+                return ('DT_ITEM_NAME')
+            else:
+                return ('DT_UNQUOTED_STRING')
+        else:
+            if self.__nlRe.search(inp):
+                return ('DT_MULTI_LINE_STRING')
+            else:
+                if (self.__avoidEmbeddedQuoting):
+                    if not self.__sqRe.search(inp) and not self.__dqWsRe.search(inp):
+                        return ('DT_DOUBLE_QUOTED_STRING')                                        
+                    elif not self.__dqRe.search(inp) and not self.__sqWsRe.search(inp):
+                        return ('DT_SINGLE_QUOTED_STRING')                                        
+                    else:
+                        return ('DT_MULTI_LINE_STRING')
+                else:
+                    if not self.__sqRe.search(inp):
+                        return ('DT_DOUBLE_QUOTED_STRING')                                        
+                    elif not self.__dqRe.search(inp):
+                        return ('DT_SINGLE_QUOTED_STRING')                                        
+                    else:
+                        return ('DT_MULTI_LINE_STRING')                    
+
+    def __singleQuotedList(self,inp):
+        l=[]
+        l.append("'")
+        l.append(inp)
+        l.append("'")        
+        return(l)
+
+    def __doubleQuotedList(self,inp):
+        l=[]
+        l.append('"')
+        l.append(inp)
+        l.append('"')        
+        return(l)
+    
+    def __semiColonQuotedList(self,inp):
+        l=[]
+        l.append("\n")            
+        if inp[-1] == '\n':
+            l.append(";")
+            l.append(inp)
+            l.append(";")
+            l.append("\n")                        
+        else:
+            l.append(";")
+            l.append(inp)
+            l.append("\n")                        
+            l.append(";")
+            l.append("\n")                                    
+
+        return(l)
+
+    def getValueFormatted(self,attributeName=None,rowIndex=None):
+        if attributeName is None:
+            attribute=self.__currentAttribute
+        else:
+            attribute=attributeName
+
+        if rowIndex is None:
+            rowI = self.__currentRowIndex
+        else:
+            rowI = rowIndex
+            
+        if isinstance(attribute, str) and isinstance(rowI,int):
+            try:
+                list,type=self.__formatPdbx(self._rowList[rowI][self._attributeNameList.index(attribute)])
+                return "".join(list)
+            except (IndexError):
+                self.__lfh.write("attributeName %s rowI %r rowdata %r\n" % (attributeName,rowI,self._rowList[rowI]))
+                raise IndexError        
+        raise TypeError, attribute
+        
+
+    def getValueFormattedByIndex(self,attributeIndex,rowIndex):
+        try:
+            list,type=self.__formatPdbx(self._rowList[rowIndex][attributeIndex])
+            return "".join(list)
+        except (IndexError):
+            raise IndexError        
+
+    def getAttributeValueMaxLengthList(self,steps=1):
+        mList=[0 for i in range(len(self._attributeNameList))]
+        for row in self._rowList[::steps]:
+            for indx in range(len(self._attributeNameList)):
+                val=row[indx]                
+                mList[indx] = max(mList[indx],len(str(val)))
+        return mList
+
+    def getFormatTypeList(self,steps=1):
+        try:
+            curDataTypeList=['DT_NULL_VALUE' for i in range(len(self._attributeNameList))]
+            for row in self._rowList[::steps]:
+                for indx in range(len(self._attributeNameList)):
+                    val=row[indx]
+                    # print "index ",indx," val ",val
+                    dType=self.__dataTypePdbx(val)
+                    dIndx=self.__dataTypeList.index(dType)
+                    # print "d type", dType, " d type index ",dIndx
+                
+                    cType=curDataTypeList[indx]
+                    cIndx=self.__dataTypeList.index(cType)           
+                    cIndx= max(cIndx,dIndx)
+                    curDataTypeList[indx]=self.__dataTypeList[cIndx]
+
+            # Map the format types to the data types
+            curFormatTypeList=[]
+            for dt in curDataTypeList:
+                ii=self.__dataTypeList.index(dt)
+                curFormatTypeList.append(self.__formatTypeList[ii])
+        except:
+            self.__lfh.write("PdbxDataCategory(getFormatTypeList) ++Index error at index %d in row %r\n" % (indx,row))
+
+        return curFormatTypeList,curDataTypeList
+
+    def getFormatTypeListX(self):
+        curDataTypeList=['DT_NULL_VALUE' for i in range(len(self._attributeNameList))]
+        for row in self._rowList:
+            for indx in range(len(self._attributeNameList)):
+                val=row[indx]
+                #print "index ",indx," val ",val
+                dType=self.__dataTypePdbx(val)
+                dIndx=self.__dataTypeList.index(dType)
+                #print "d type", dType, " d type index ",dIndx
+                
+                cType=curDataTypeList[indx]
+                cIndx=self.__dataTypeList.index(cType)           
+                cIndx= max(cIndx,dIndx)
+                curDataTypeList[indx]=self.__dataTypeList[cIndx]
+
+        # Map the format types to the data types
+        curFormatTypeList=[]
+        for dt in curDataTypeList:
+            ii=self.__dataTypeList.index(dt)
+            curFormatTypeList.append(self.__formatTypeList[ii])
+        return curFormatTypeList,curDataTypeList
+    
+
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxParser.py
+##
+# File:  PdbxParser.py
+# Date:  2009-10-25 Jdw  Original from py-pdbx-parser-v2 
+#
+# Update:
+#
+# 2009-11-05 - (jdw) Change table storage architecture for list of
+#                   dictionaries to list of lists.
+# 2012-01-09 - (jdw) This module now obsolted by PdbxReader/PdbxWriter
+#                   modules.  APIs are preserved.
+#
+# 2012-09-01 - (jdw) Revise tokenizer to better handle embedded quoting.
+#
+# NOTE -       - Now obsolete - Use pdb.reader.PdbxReader & pdbx.writer.PdbxWriter
+#
+##
+"""
+PDBx/mmCIF dictionary and data file parser.
+
+Acknowledgements:
+
+ The tokenizer used in this module is modeled after the clever parser design
+ used in the PyMMLIB package.
+ 
+ PyMMLib Development Group
+ Authors: Ethan Merritt: merritt@u.washington.ed  & Jay Painter: jay.painter@gmail.com
+ See:  http://pymmlib.sourceforge.net/
+
+"""
+
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+
+import re,sys
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+
+class PdbxError(Exception):
+    """ Class for catch general errors 
+    """
+    pass
+
+class SyntaxError(Exception):
+    """ Class for catching syntax errors 
+    """
+    def __init__(self, lineNumber, text):
+        Exception.__init__(self)
+        self.lineNumber = lineNumber
+        self.text = text
+
+    def __str__(self):
+        return "%%ERROR - [at line: %d] %s" % (self.lineNumber, self.text)
+
+
+
+class PdbxReader(object):
+    """ PDBx reader for data files and dictionaries.
+    
+    """
+    def __init__(self,ifh):
+        """  ifh - input file handle returned by open()
+        """
+        # 
+        self.__curLineNumber = 0        
+        self.__ifh=ifh
+        self.__stateDict={"data":   "ST_DATA_CONTAINER",
+                          "loop":   "ST_TABLE",
+                          "global": "ST_GLOBAL_CONTAINER",
+                          "save":   "ST_DEFINITION",
+                          "stop":   "ST_STOP"}
+        
+    def read(self, containerList):
+        """
+        Appends to the input list of definition and data containers.
+        
+        """
+        self.__curLineNumber = 0
+        try:
+            self.__parser(self.__tokenizer(self.__ifh), containerList)
+        except StopIteration:
+            pass
+        else:
+            raise PdbxError()
+
+    def __syntaxError(self, errText):
+        raise SyntaxError(self.__curLineNumber, errText)
+
+    def __getContainerName(self,inWord):
+        """ Returns the name of the data_ or save_ container
+        """
+        return str(inWord[5:]).strip()
+    
+    def __getState(self, inWord):
+        """Identifies reserved syntax elements and assigns an associated state.  
+
+           Returns: (reserved word, state)
+           where - 
+              reserved word -  is one of CIF syntax elements:
+                               data_, loop_, global_, save_, stop_
+              state - the parser state required to process this next section.
+        """
+        i = inWord.find("_")
+        if i == -1:
+            return None,"ST_UNKNOWN"
+
+        try:
+            rWord=inWord[:i].lower()            
+            return rWord, self.__stateDict[rWord]
+        except:
+            return None,"ST_UNKNOWN"
+        
+    def __parser(self, tokenizer, containerList):
+        """ Parser for PDBx data files and dictionaries.
+
+            Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
+                    quoted strings (single, double and multi-line semi-colon delimited), and unquoted
+                    strings.
+
+                    containerList -  list-type container for data and definition objects parsed from
+                                     from the input file.
+
+            Return:
+                    containerList - is appended with data and definition objects - 
+        """
+        # Working container - data or definition
+        curContainer = None
+        #
+        # Working category container 
+        categoryIndex = {}
+        curCategory = None
+        #
+        curRow = None
+        state =  None
+
+        # Find the first reserved word and begin capturing data.
+        #
+        while True:
+            curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+            if curWord is None:
+                continue
+            reservedWord, state  = self.__getState(curWord)
+            if reservedWord is not None:
+                break
+        
+        while True:
+            #
+            #  Set the current state  -
+            #
+            #  At this point in the processing cycle we are expecting a token containing
+            #  either a '_category.attribute'  or a reserved word.  
+            #
+            if curCatName is not None:
+                state = "ST_KEY_VALUE_PAIR"
+            elif curWord is not None:
+                reservedWord, state = self.__getState(curWord)
+            else:
+                self.__syntaxError("Miscellaneous syntax error")
+                return            
+
+            #
+            # Process  _category.attribute  value assignments 
+            #
+            if state == "ST_KEY_VALUE_PAIR":
+                try:
+                    curCategory = categoryIndex[curCatName]
+                except KeyError:
+                    # A new category is encountered - create a container and add a row 
+                    curCategory = categoryIndex[curCatName] = DataCategory(curCatName)
+
+                    try:
+                        curContainer.append(curCategory)
+                    except AttributeError:
+                        self.__syntaxError("Category cannot be added to  data_ block")
+                        return
+
+                    curRow = []                    
+                    curCategory.append(curRow)
+                else:
+                    # Recover the existing row from the category
+                    try:
+                        curRow = curCategory[0] 
+                    except IndexError:
+                        self.__syntaxError("Internal index error accessing category data")
+                        return
+
+                # Check for duplicate attributes and add attribute to table.
+                if curAttName in curCategory.getAttributeList():
+                    self.__syntaxError("Duplicate attribute encountered in category")
+                    return
+                else:
+                    curCategory.appendAttribute(curAttName)
+
+
+                # Get the data for this attribute from the next token
+                tCat, tAtt, curQuotedString, curWord = tokenizer.next()
+
+                if tCat is not None or (curQuotedString is None and curWord is None):
+                    self.__syntaxError("Missing data for item _%s.%s" % (curCatName,curAttName))
+
+                if curWord is not None:
+                    # 
+                    # Validation check token for misplaced reserved words  -  
+                    #
+                    reservedWord, state  = self.__getState(curWord)
+                    if reservedWord is not None:
+                        self.__syntaxError("Unexpected reserved word: %s" % (reservedWord))
+
+                    curRow.append(curWord)
+
+                elif curQuotedString is not None:
+                    curRow.append(curQuotedString)
+
+                else:
+                    self.__syntaxError("Missing value in item-value pair")
+
+                curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+                continue
+
+            #
+            # Process a loop_ declaration and associated data -
+            #
+            elif state == "ST_TABLE":
+
+                # The category name in the next curCatName,curAttName pair
+                #    defines the name of the category container.
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+                if curCatName is None or curAttName is None:
+                    self.__syntaxError("Unexpected token in loop_ declaration")
+                    return
+
+                # Check for a previous category declaration.
+                if categoryIndex.has_key(curCatName):
+                    self.__syntaxError("Duplicate category declaration in loop_")
+                    return
+
+                curCategory = DataCategory(curCatName)
+
+                try:
+                    curContainer.append(curCategory)
+                except AttributeError:
+                    self.__syntaxError("loop_ declaration outside of data_ block or save_ frame")
+                    return
+
+                curCategory.appendAttribute(curAttName)
+
+                # Read the rest of the loop_ declaration 
+                while True:
+                    curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+                    
+                    if curCatName is None:
+                        break
+
+                    if curCatName != curCategory.getName():
+                        self.__syntaxError("Changed category name in loop_ declaration")
+                        return
+
+                    curCategory.appendAttribute(curAttName)
+
+
+                # If the next token is a 'word', check it for any reserved words - 
+                if curWord is not None:
+                    reservedWord, state  = self.__getState(curWord)
+                    if reservedWord is not None:
+                        if reservedWord == "stop":
+                            return
+                        else:
+                            self.__syntaxError("Unexpected reserved word after loop declaration: %s" % (reservedWord))
+                    
+                # Read the table of data for this loop_ - 
+                while True:
+                    curRow = []                    
+                    curCategory.append(curRow)
+
+                    for tAtt in curCategory.getAttributeList():
+                        if curWord is not None:
+                            curRow.append(curWord)
+                        elif curQuotedString is not None:
+                            curRow.append(curQuotedString)
+
+                        curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+                    # loop_ data processing ends if - 
+
+                    # A new _category.attribute is encountered
+                    if curCatName is not None:
+                        break
+
+                    # A reserved word is encountered
+                    if curWord is not None:
+                        reservedWord, state = self.__getState(curWord)
+                        if reservedWord is not None:
+                            break
+                        
+                continue
+
+
+            elif state == "ST_DEFINITION":
+                # Ignore trailing unnamed saveframe delimiters e.g. 'save_'
+                sName=self.__getContainerName(curWord)
+                if (len(sName) > 0):
+                    curContainer = DefinitionContainer(sName)
+                    containerList.append(curContainer)
+                    categoryIndex = {}
+                    curCategory = None
+
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+            elif state == "ST_DATA_CONTAINER":
+                #
+                dName=self.__getContainerName(curWord)
+                if len(dName) == 0:
+                    dName="unidentified"
+                curContainer = DataContainer(dName)
+                containerList.append(curContainer)
+                categoryIndex = {}
+                curCategory = None
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+            elif state == "ST_STOP":
+                return
+            elif state == "ST_GLOBAL":
+                curContainer = DataContainer("blank-global")
+                curContainer.setGlobal()
+                containerList.append(curContainer)
+                categoryIndex = {}
+                curCategory = None
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+            elif state == "ST_UNKNOWN":
+                self.__syntaxError("Unrecogized syntax element: " + str(curWord))
+                return
+                
+
+    def __tokenizer(self, ifh):
+        """ Tokenizer method for the mmCIF syntax file - 
+
+            Each return/yield from this method returns information about
+            the next token in the form of a tuple with the following structure.
+
+            (category name, attribute name, quoted strings, words w/o quotes or white space)
+
+            Differentiated the reqular expression to the better handle embedded quotes.
+
+        """
+        #
+        # Regex definition for mmCIF syntax - semi-colon delimited strings are handled
+        #                                     outside of this regex.
+        mmcifRe = re.compile(
+            r"(?:"
+
+             "(?:_(.+?)[.](\S+))"               "|"  # _category.attribute
+
+             "(?:['](.*?)(?:[']\s|[']$))"       "|"  # single quoted strings
+             "(?:[\"](.*?)(?:[\"]\s|[\"]$))"    "|"  # double quoted strings             
+
+             "(?:\s*#.*$)"                      "|"  # comments (dumped)
+
+             "(\S+)"                                 # unquoted words
+
+             ")")
+
+        fileIter = iter(ifh)
+
+        ## Tokenizer loop begins here ---
+        while True:
+            line = fileIter.next()
+            self.__curLineNumber += 1
+
+            # Dump comments
+            if line.startswith("#"):
+                continue
+            
+            # Gobble up the entire semi-colon/multi-line delimited string and
+            #    and stuff this into the string slot in the return tuple
+            #
+            if line.startswith(";"):
+                mlString = [line[1:]]
+                while True:
+                    line = fileIter.next()
+                    self.__curLineNumber += 1
+                    if line.startswith(";"):
+                        break
+                    mlString.append(line)
+
+                # remove trailing new-line that is part of the \n; delimiter
+                mlString[-1] = mlString[-1].rstrip()
+                #
+                yield (None, None, "".join(mlString), None)
+                #
+                # Need to process the remainder of the current line -
+                line = line[1:]
+                #continue
+
+            # Apply regex to the current line consolidate the single/double
+            # quoted within the quoted string category
+            for it in mmcifRe.finditer(line):
+                tgroups = it.groups()
+                if tgroups != (None, None, None, None, None):
+                    if tgroups[2] is not None:
+                        qs = tgroups[2]
+                    elif tgroups[3] is not None:
+                        qs = tgroups[3]
+                    else:
+                        qs = None
+                    groups = (tgroups[0],tgroups[1],qs,tgroups[4])
+                    yield groups
+
+    def __tokenizerOrg(self, ifh):
+        """ Tokenizer method for the mmCIF syntax file - 
+
+            Each return/yield from this method returns information about
+            the next token in the form of a tuple with the following structure.
+
+            (category name, attribute name, quoted strings, words w/o quotes or white space)
+
+        mmcifRe = re.compile(
+            r"(?:"
+
+             "(?:_(.+?)[.](\S+))"               "|"  # _category.attribute
+
+             "(?:['\"](.*?)(?:['\"]\s|['\"]$))"   "|"    # quoted strings
+
+             "(?:\s*#.*$)"                      "|"  # comments (dumped)
+
+             "(\S+)"                                 # unquoted words
+
+             ")")
+
+
+        """
+        #
+        # Regex definition for mmCIF syntax - semi-colon delimited strings are handled
+        #                                     outside of this regex.
+        mmcifRe = re.compile(
+            r"(?:"
+
+             "(?:_(.+?)[.](\S+))"               "|"  # _category.attribute
+
+             "(?:['\"](.*?)(?:['\"]\s|['\"]$))"   "|"    # quoted strings
+
+             "(?:\s*#.*$)"                      "|"  # comments (dumped)
+
+             "(\S+)"                                 # unquoted words
+
+             ")")
+
+        fileIter = iter(ifh)
+
+        ## Tokenizer loop begins here ---
+        while True:
+            line = fileIter.next()
+            self.__curLineNumber += 1
+
+            # Dump comments
+            if line.startswith("#"):
+                continue
+            
+            # Gobble up the entire semi-colon/multi-line delimited string and
+            #    and stuff this into the string slot in the return tuple
+            #
+            if line.startswith(";"):
+                mlString = [line[1:]]
+                while True:
+                    line = fileIter.next()
+                    self.__curLineNumber += 1
+                    if line.startswith(";"):
+                        break
+                    mlString.append(line)
+
+                # remove trailing new-line that is part of the \n; delimiter
+                mlString[-1] = mlString[-1].rstrip()
+                #
+                yield (None, None, "".join(mlString), None)
+                #
+                # Need to process the remainder of the current line -
+                line = line[1:]
+                #continue
+
+            ## Apply regex to the current line 
+            for it in mmcifRe.finditer(line):
+                groups = it.groups()
+                if groups != (None, None, None, None):
+                    yield groups
+
+
+class PdbxWriter(object):
+    """Write PDBx data files or dictionaries using the input container
+       or container list.
+    """
+    def __init__(self,ofh=sys.stdout):
+        self.__ofh=ofh
+        self.__containerList=[]
+        self.__MAXIMUM_LINE_LENGTH = 2048
+        self.__SPACING = 2
+        self.__INDENT_DEFINITION = 3
+        self.__indentSpace = " " * self.__INDENT_DEFINITION        
+        self.__doDefinitionIndent=False
+        
+    def write(self, containerList):
+        self.__containerList=containerList
+        for  container in self.__containerList:
+            self.writeContainer(container)
+
+    def writeContainer(self,container):
+        indS=" " *  self.__INDENT_DEFINITION
+        if isinstance(container, DefinitionContainer):
+            self.__write("save_%s\n" % container.getName())
+            self.__doDefinitionIndent=True
+            self.__write(indS+"#\n")            
+        elif isinstance(container, DataContainer):
+            if (container.getGlobal()):
+                self.__write("global_\n")
+                self.__doDefinitionIndent=False
+                self.__write("\n")            
+            else:
+                self.__write("data_%s\n" % container.getName())
+                self.__doDefinitionIndent=False
+                self.__write("#\n")                            
+        
+        for nm in container.getObjNameList():
+            obj=container.getObj(nm)
+            objL=obj.getRowList()
+
+            # Skip empty objects
+            if len(objL) == 0:
+                continue
+
+            # Item - value formattting 
+            elif len(objL) == 1:
+                self.__writeItemValueFormat(obj)
+
+            # Table formatting - 
+            elif len(objL) > 1 and len(obj.getAttributeList()) > 0:
+                self.__writeTableFormat(obj)
+            else:
+                raise PdbxError()
+
+            if self.__doDefinitionIndent:
+                self.__write(indS+"#")
+            else:
+                self.__write("#")
+
+        # Add a trailing saveframe reserved word 
+        if isinstance(container, DefinitionContainer):
+            self.__write("save_\n")
+        self.__write("#\n")            
+
+    def __write(self, st):
+        self.__ofh.write(st)
+
+    def __writeItemValueFormat(self, myCategory):
+
+        # Compute the maximum item name length within this category - 
+        attributeNameLengthMax  = 0
+        for attributeName in myCategory.getAttributeList():
+            attributeNameLengthMax = max(attributeNameLengthMax, len(attributeName))
+        itemNameLengthMax = self.__SPACING + len(myCategory.getName()) + attributeNameLengthMax + 2
+        #
+        lineList=[]
+        for attributeName,iPos in myCategory.getAttributeListWithOrder():        
+            lineList.append("\n")
+            if  self.__doDefinitionIndent:
+                #        - add indent --                
+                lineList.append(self.__indentSpace)
+                
+            itemName = "_%s.%s" % (myCategory.getName(), attributeName)                
+            lineList.append(itemName.ljust(itemNameLengthMax))
+            
+            lineList.append(myCategory.getValueFormatted(attributeName,0))            
+
+        lineList.append("\n")
+        self.__write("".join(lineList))
+
+    def __writeTableFormat(self, myCategory):
+        
+        # Write the declaration of the loop_
+        #
+        lineList=[]
+        lineList.append('\n')
+        if  self.__doDefinitionIndent:            
+            lineList.append(self.__indentSpace)
+        lineList.append("loop_")
+        for attributeName in myCategory.getAttributeList():
+            lineList.append('\n')            
+            if  self.__doDefinitionIndent:            
+                lineList.append(self.__indentSpace)
+            itemName = "_%s.%s" % (myCategory.getName(), attributeName)
+            lineList.append(itemName)
+        self.__write("".join(lineList))
+
+        #
+        # Write the data in tabular format - 
+        #
+        #print myCategory.getName()
+        #print myCategory.getAttributeList()
+        
+        formatTypeList,dataTypeList=myCategory.getFormatTypeList()
+        maxLengthList=myCategory.getAttributeValueMaxLengthList()
+        spacing   = " " * self.__SPACING
+        #
+
+        #print formatTypeList
+        #print dataTypeList
+        #print maxLengthList
+        #
+        for iRow in range(myCategory.getRowCount()):
+            lineList     = []            
+            lineList.append('\n')
+            if  self.__doDefinitionIndent:
+                lineList.append(self.__indentSpace + "  ")
+
+            for iAt in range(myCategory.getAttributeCount()):
+                formatType = formatTypeList[iAt]
+                maxLength  = maxLengthList[iAt]
+
+                if (formatType == 'FT_UNQUOTED_STRING' or formatType == 'FT_NULL_VALUE'):
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.ljust(maxLength))
+
+                elif formatType == 'FT_NUMBER':
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.rjust(maxLength))
+                    
+                elif formatType == 'FT_QUOTED_STRING':
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.ljust(maxLength))
+
+                elif formatType == "FT_MULTI_LINE_STRING":
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val)
+                    
+                lineList.append(spacing)                    
+
+            self.__write("".join(lineList))
+        self.__write("\n")
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReadWriteTests.py
+##
+# File:    PdbxReadWriteTests.py
+# Author:  jdw
+# Date:    9-Oct-2011
+# Version: 0.001
+#
+# Updated:
+#         24-Oct-2012 jdw update path details and reorganize.
+#
+##
+"""  Various tests caess for PDBx/mmCIF data file and dictionary reader and writer. 
+"""
+
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+
+import sys, unittest, traceback
+import sys, time, os, os.path, shutil
+
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
+from simtk.openmm.app.internal.pdbx.writer.PdbxWriter import PdbxWriter
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+
+
+class PdbxReadWriteTests(unittest.TestCase):
+    def setUp(self):
+        self.lfh=sys.stdout
+        self.verbose=False
+        self.pathPdbxDataFile     = "../tests/1kip.cif"
+        self.pathOutputFile       = "testOutputDataFile.cif"
+
+    def tearDown(self):
+        pass
+
+
+    def testSimpleInitialization(self):
+        """Test case -  Simple initialization of a data category and data block
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            fn="test-simple.cif"
+            attributeNameList=['aOne','aTwo','aThree','aFour','aFive','aSix','aSeven','aEight','aNine','aTen']
+            rowList=[[1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10],
+                     [1,2,3,4,5,6,7,8,9,10] 
+                     ]
+            nameCat='myCategory'
+            #
+            #
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory(nameCat,attributeNameList,rowList)
+            aCat.printIt()
+            curContainer.append(aCat)
+            curContainer.printIt()
+            #
+            myContainerList=[]
+            myContainerList.append(curContainer)
+            ofh = open(fn, "w")        
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myContainerList)
+            ofh.close()
+
+            myContainerList=[]            
+            ifh = open(fn, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myContainerList)
+            ifh.close()
+            for container in myContainerList:
+                for objName in container.getObjNameList():
+                    name,aList,rList=container.getObj(objName).get()
+                    self.lfh.write("Recovered data category  %s\n" % name)
+                    self.lfh.write("Attribute list           %r\n" % repr(aList))
+                    self.lfh.write("Row list                 %r\n" % repr(rList))                                        
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+            
+        
+    def testWriteDataFile(self): 
+        """Test case -  write data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ofh = open("test-output.cif", "w")
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([1,2,3,4,5,6,7])
+            aCat.append([7,6,5,4,3,2,1])
+            aCat.printIt()            
+            curContainer.append(aCat)
+            curContainer.printIt()
+            #
+            myDataList.append(curContainer)
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+
+    def testUpdateDataFile(self): 
+        """Test case -  update data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            # Create a initial data file --
+            #
+            myDataList=[]
+
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append([9,2,3,4,5,6,7])
+            aCat.append([10,2,3,4,5,6,7])
+            aCat.append([11,2,3,4,5,6,7])
+            aCat.append([12,2,3,4,5,6,7])
+            
+            #self.lfh.write("Assigned data category state-----------------\n")            
+            #aCat.dumpIt(fh=self.lfh)
+
+            curContainer.append(aCat)
+            myDataList.append(curContainer)
+            ofh = open("test-output-1.cif", "w")            
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+            #
+            #
+            # Read and update the data -
+            # 
+            myDataList=[]
+            ifh = open("test-output-1.cif", "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()
+            #
+            myBlock=myDataList[0]
+            myBlock.printIt()
+            myCat=myBlock.getObj('pdbx_seqtool_mapping_ref')
+            myCat.printIt()
+            for iRow in xrange(0,myCat.getRowCount()):
+                myCat.setValue('some value', 'ref_mon_id',iRow)
+                myCat.setValue(100, 'ref_mon_num',iRow)
+            ofh = open("test-output-2.cif", "w")            
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+            #
+            
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+
+    def testReadDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+
+    def testReadWriteDataFile(self): 
+        """Test case -  data file read write test
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            myDataList=[]            
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+            
+            ofh = open(self.pathOutputFile, "w")
+            pWr=PdbxWriter(ofh)
+            pWr.write(myDataList)        
+            ofh.close()
+        except:
+            traceback.print_exc(file=self.lfh)
+            self.fail()
+
+
+def simpleSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(PdbxReadWriteTests("testSimpleInitialization"))
+    suiteSelect.addTest(PdbxReadWriteTests("testUpdateDataFile"))        
+    suiteSelect.addTest(PdbxReadWriteTests("testReadWriteDataFile"))    
+    return suiteSelect
+
+
+if __name__ == '__main__':
+    #
+    mySuite=simpleSuite()      
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
+    #
+
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReader.py
+##
+# File:  PdbxReader.py
+# Date:  2012-01-09  Jdw  Adapted from PdbxParser
+#
+# Updates:
+#
+# 2012-01-09 - (jdw) Separate reader and writer classes.
+#
+# 2012-09-02 - (jdw)  Revise tokenizer to better handle embedded quoting.
+#
+##
+"""
+PDBx/mmCIF dictionary and data file parser.
+
+Acknowledgements:
+
+ The tokenizer used in this module is modeled after the clever parser design
+ used in the PyMMLIB package.
+ 
+ PyMMLib Development Group
+ Authors: Ethan Merritt: merritt@u.washington.ed  & Jay Painter: jay.painter@gmail.com
+ See:  http://pymmlib.sourceforge.net/
+
+"""
+
+import re,sys
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+
+class PdbxError(Exception):
+    """ Class for catch general errors 
+    """
+    pass
+
+class SyntaxError(Exception):
+    """ Class for catching syntax errors 
+    """
+    def __init__(self, lineNumber, text):
+        Exception.__init__(self)
+        self.lineNumber = lineNumber
+        self.text = text
+
+    def __str__(self):
+        return "%%ERROR - [at line: %d] %s" % (self.lineNumber, self.text)
+
+
+
+class PdbxReader(object):
+    """ PDBx reader for data files and dictionaries.
+    
+    """
+    def __init__(self,ifh):
+        """  ifh - input file handle returned by open()
+        """
+        # 
+        self.__curLineNumber = 0        
+        self.__ifh=ifh
+        self.__stateDict={"data":   "ST_DATA_CONTAINER",
+                          "loop":   "ST_TABLE",
+                          "global": "ST_GLOBAL_CONTAINER",
+                          "save":   "ST_DEFINITION",
+                          "stop":   "ST_STOP"}
+        
+    def read(self, containerList):
+        """
+        Appends to the input list of definition and data containers.
+        
+        """
+        self.__curLineNumber = 0
+        try:
+            self.__parser(self.__tokenizer(self.__ifh), containerList)
+        except StopIteration:
+            pass
+        else:
+            raise PdbxError()
+
+    def __syntaxError(self, errText):
+        raise SyntaxError(self.__curLineNumber, errText)
+
+    def __getContainerName(self,inWord):
+        """ Returns the name of the data_ or save_ container
+        """
+        return str(inWord[5:]).strip()
+    
+    def __getState(self, inWord):
+        """Identifies reserved syntax elements and assigns an associated state.  
+
+           Returns: (reserved word, state)
+           where - 
+              reserved word -  is one of CIF syntax elements:
+                               data_, loop_, global_, save_, stop_
+              state - the parser state required to process this next section.
+        """
+        i = inWord.find("_")
+        if i == -1:
+            return None,"ST_UNKNOWN"
+
+        try:
+            rWord=inWord[:i].lower()            
+            return rWord, self.__stateDict[rWord]
+        except:
+            return None,"ST_UNKNOWN"
+        
+    def __parser(self, tokenizer, containerList):
+        """ Parser for PDBx data files and dictionaries.
+
+            Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
+                    quoted strings (single, double and multi-line semi-colon delimited), and unquoted
+                    strings.
+
+                    containerList -  list-type container for data and definition objects parsed from
+                                     from the input file.
+
+            Return:
+                    containerList - is appended with data and definition objects - 
+        """
+        # Working container - data or definition
+        curContainer = None
+        #
+        # Working category container 
+        categoryIndex = {}
+        curCategory = None
+        #
+        curRow = None
+        state =  None
+
+        # Find the first reserved word and begin capturing data.
+        #
+        while True:
+            curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+            if curWord is None:
+                continue
+            reservedWord, state  = self.__getState(curWord)
+            if reservedWord is not None:
+                break
+        
+        while True:
+            #
+            #  Set the current state  -
+            #
+            #  At this point in the processing cycle we are expecting a token containing
+            #  either a '_category.attribute'  or a reserved word.  
+            #
+            if curCatName is not None:
+                state = "ST_KEY_VALUE_PAIR"
+            elif curWord is not None:
+                reservedWord, state = self.__getState(curWord)
+            else:
+                self.__syntaxError("Miscellaneous syntax error")
+                return            
+
+            #
+            # Process  _category.attribute  value assignments 
+            #
+            if state == "ST_KEY_VALUE_PAIR":
+                try:
+                    curCategory = categoryIndex[curCatName]
+                except KeyError:
+                    # A new category is encountered - create a container and add a row 
+                    curCategory = categoryIndex[curCatName] = DataCategory(curCatName)
+
+                    try:
+                        curContainer.append(curCategory)
+                    except AttributeError:
+                        self.__syntaxError("Category cannot be added to  data_ block")
+                        return
+
+                    curRow = []                    
+                    curCategory.append(curRow)
+                else:
+                    # Recover the existing row from the category
+                    try:
+                        curRow = curCategory[0] 
+                    except IndexError:
+                        self.__syntaxError("Internal index error accessing category data")
+                        return
+
+                # Check for duplicate attributes and add attribute to table.
+                if curAttName in curCategory.getAttributeList():
+                    self.__syntaxError("Duplicate attribute encountered in category")
+                    return
+                else:
+                    curCategory.appendAttribute(curAttName)
+
+
+                # Get the data for this attribute from the next token
+                tCat, tAtt, curQuotedString, curWord = tokenizer.next()
+
+                if tCat is not None or (curQuotedString is None and curWord is None):
+                    self.__syntaxError("Missing data for item _%s.%s" % (curCatName,curAttName))
+
+                if curWord is not None:
+                    # 
+                    # Validation check token for misplaced reserved words  -  
+                    #
+                    reservedWord, state  = self.__getState(curWord)
+                    if reservedWord is not None:
+                        self.__syntaxError("Unexpected reserved word: %s" % (reservedWord))
+
+                    curRow.append(curWord)
+
+                elif curQuotedString is not None:
+                    curRow.append(curQuotedString)
+
+                else:
+                    self.__syntaxError("Missing value in item-value pair")
+
+                curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+                continue
+
+            #
+            # Process a loop_ declaration and associated data -
+            #
+            elif state == "ST_TABLE":
+
+                # The category name in the next curCatName,curAttName pair
+                #    defines the name of the category container.
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+                if curCatName is None or curAttName is None:
+                    self.__syntaxError("Unexpected token in loop_ declaration")
+                    return
+
+                # Check for a previous category declaration.
+                if categoryIndex.has_key(curCatName):
+                    self.__syntaxError("Duplicate category declaration in loop_")
+                    return
+
+                curCategory = DataCategory(curCatName)
+
+                try:
+                    curContainer.append(curCategory)
+                except AttributeError:
+                    self.__syntaxError("loop_ declaration outside of data_ block or save_ frame")
+                    return
+
+                curCategory.appendAttribute(curAttName)
+
+                # Read the rest of the loop_ declaration 
+                while True:
+                    curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
+                    
+                    if curCatName is None:
+                        break
+
+                    if curCatName != curCategory.getName():
+                        self.__syntaxError("Changed category name in loop_ declaration")
+                        return
+
+                    curCategory.appendAttribute(curAttName)
+
+
+                # If the next token is a 'word', check it for any reserved words - 
+                if curWord is not None:
+                    reservedWord, state  = self.__getState(curWord)
+                    if reservedWord is not None:
+                        if reservedWord == "stop":
+                            return
+                        else:
+                            self.__syntaxError("Unexpected reserved word after loop declaration: %s" % (reservedWord))
+                    
+                # Read the table of data for this loop_ - 
+                while True:
+                    curRow = []                    
+                    curCategory.append(curRow)
+
+                    for tAtt in curCategory.getAttributeList():
+                        if curWord is not None:
+                            curRow.append(curWord)
+                        elif curQuotedString is not None:
+                            curRow.append(curQuotedString)
+
+                        curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+                    # loop_ data processing ends if - 
+
+                    # A new _category.attribute is encountered
+                    if curCatName is not None:
+                        break
+
+                    # A reserved word is encountered
+                    if curWord is not None:
+                        reservedWord, state = self.__getState(curWord)
+                        if reservedWord is not None:
+                            break
+                        
+                continue
+
+
+            elif state == "ST_DEFINITION":
+                # Ignore trailing unnamed saveframe delimiters e.g. 'save_'
+                sName=self.__getContainerName(curWord)
+                if (len(sName) > 0):
+                    curContainer = DefinitionContainer(sName)
+                    containerList.append(curContainer)
+                    categoryIndex = {}
+                    curCategory = None
+
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+            elif state == "ST_DATA_CONTAINER":
+                #
+                dName=self.__getContainerName(curWord)
+                if len(dName) == 0:
+                    dName="unidentified"
+                curContainer = DataContainer(dName)
+                containerList.append(curContainer)
+                categoryIndex = {}
+                curCategory = None
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+            elif state == "ST_STOP":
+                return
+            elif state == "ST_GLOBAL":
+                curContainer = DataContainer("blank-global")
+                curContainer.setGlobal()
+                containerList.append(curContainer)
+                categoryIndex = {}
+                curCategory = None
+                curCatName,curAttName,curQuotedString,curWord = tokenizer.next()
+
+            elif state == "ST_UNKNOWN":
+                self.__syntaxError("Unrecogized syntax element: " + str(curWord))
+                return
+                
+
+    def __tokenizer(self, ifh):
+        """ Tokenizer method for the mmCIF syntax file - 
+
+            Each return/yield from this method returns information about
+            the next token in the form of a tuple with the following structure.
+
+            (category name, attribute name, quoted strings, words w/o quotes or white space)
+
+            Differentiated the reqular expression to the better handle embedded quotes.
+
+        """
+        #
+        # Regex definition for mmCIF syntax - semi-colon delimited strings are handled
+        #                                     outside of this regex.
+        mmcifRe = re.compile(
+            r"(?:"
+
+             "(?:_(.+?)[.](\S+))"               "|"  # _category.attribute
+
+             "(?:['](.*?)(?:[']\s|[']$))"       "|"  # single quoted strings
+             "(?:[\"](.*?)(?:[\"]\s|[\"]$))"    "|"  # double quoted strings             
+
+             "(?:\s*#.*$)"                      "|"  # comments (dumped)
+
+             "(\S+)"                                 # unquoted words
+
+             ")")
+
+        fileIter = iter(ifh)
+
+        ## Tokenizer loop begins here ---
+        while True:
+            line = fileIter.next()
+            self.__curLineNumber += 1
+
+            # Dump comments
+            if line.startswith("#"):
+                continue
+            
+            # Gobble up the entire semi-colon/multi-line delimited string and
+            #    and stuff this into the string slot in the return tuple
+            #
+            if line.startswith(";"):
+                mlString = [line[1:]]
+                while True:
+                    line = fileIter.next()
+                    self.__curLineNumber += 1
+                    if line.startswith(";"):
+                        break
+                    mlString.append(line)
+
+                # remove trailing new-line that is part of the \n; delimiter
+                mlString[-1] = mlString[-1].rstrip()
+                #
+                yield (None, None, "".join(mlString), None)
+                #
+                # Need to process the remainder of the current line -
+                line = line[1:]
+                #continue
+
+            # Apply regex to the current line consolidate the single/double
+            # quoted within the quoted string category
+            for it in mmcifRe.finditer(line):
+                tgroups = it.groups()
+                if tgroups != (None, None, None, None, None):
+                    if tgroups[2] is not None:
+                        qs = tgroups[2]
+                    elif tgroups[3] is not None:
+                        qs = tgroups[3]
+                    else:
+                        qs = None
+                    groups = (tgroups[0],tgroups[1],qs,tgroups[4])
+                    yield groups
+
+    def __tokenizerOrg(self, ifh):
+        """ Tokenizer method for the mmCIF syntax file - 
+
+            Each return/yield from this method returns information about
+            the next token in the form of a tuple with the following structure.
+
+            (category name, attribute name, quoted strings, words w/o quotes or white space)
+
+        """
+        #
+        # Regex definition for mmCIF syntax - semi-colon delimited strings are handled
+        #                                     outside of this regex.
+        mmcifRe = re.compile(
+            r"(?:"
+
+             "(?:_(.+?)[.](\S+))"               "|"  # _category.attribute
+
+             "(?:['\"](.*?)(?:['\"]\s|['\"]$))" "|"  # quoted strings
+
+             "(?:\s*#.*$)"                      "|"  # comments (dumped)
+
+             "(\S+)"                                 # unquoted words
+
+             ")")
+
+        fileIter = iter(ifh)
+
+        ## Tokenizer loop begins here ---
+        while True:
+            line = fileIter.next()
+            self.__curLineNumber += 1
+
+            # Dump comments
+            if line.startswith("#"):
+                continue
+            
+            # Gobble up the entire semi-colon/multi-line delimited string and
+            #    and stuff this into the string slot in the return tuple
+            #
+            if line.startswith(";"):
+                mlString = [line[1:]]
+                while True:
+                    line = fileIter.next()
+                    self.__curLineNumber += 1
+                    if line.startswith(";"):
+                        break
+                    mlString.append(line)
+
+                # remove trailing new-line that is part of the \n; delimiter
+                mlString[-1] = mlString[-1].rstrip()
+                #
+                yield (None, None, "".join(mlString), None)
+                #
+                # Need to process the remainder of the current line -
+                line = line[1:]
+                #continue
+
+            ## Apply regex to the current line 
+            for it in mmcifRe.finditer(line):
+                groups = it.groups()
+                if groups != (None, None, None, None):
+                    yield groups
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/PdbxReaderTests.py
+##
+# File:    PdbxReaderTests.py
+# Author:  jdw
+# Date:    9-Jan-2012
+# Version: 0.001
+#
+# Update:
+#  27-Sep-2012  jdw add test case for reading PDBx structure factor file 
+#
+##
+"""
+Test cases for reading PDBx/mmCIF data files PdbxReader class -
+
+"""
+import sys, unittest, traceback
+import sys, time, os, os.path, shutil
+
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+
+class PdbxReaderTests(unittest.TestCase):
+    def setUp(self):
+        self.lfh=sys.stderr
+        self.verbose=False
+        self.pathPdbxDataFile     ="../tests/1kip.cif"
+        self.pathBigPdbxDataFile  ="../tests/1ffk.cif"
+        self.pathSFDataFile       ="../tests/1kip-sf.cif"
+
+    def tearDown(self):
+        pass
+
+    def testReadSmallDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+
+    def testReadBigDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathBigPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+
+    def testReadSFDataFile(self): 
+        """Test case -  read PDB structure factor data  file and compute statistics on f/sig(f).
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myContainerList=[]
+            ifh = open(self.pathSFDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myContainerList)
+            c0=myContainerList[0]
+            #
+            catObj=c0.getObj("refln")
+            if catObj is None:
+                return false
+        
+            nRows=catObj.getRowCount()
+            #
+            # Get column name index.
+            #
+            itDict={}
+            itNameList=catObj.getItemNameList()
+            for idxIt,itName in enumerate(itNameList):
+                itDict[str(itName).lower()]=idxIt
+                #
+            idf=itDict['_refln.f_meas_au']
+            idsigf=itDict['_refln.f_meas_sigma_au']
+            minR=100
+            maxR=-1
+            sumR=0
+            icount=0
+            for row in  catObj.getRowList():
+                try:
+                    f=float(row[idf])
+                    sigf=float(row[idsigf])
+                    ratio=sigf/f
+                    #self.lfh.write(" %f %f %f\n" % (f,sigf,ratio))
+                    maxR=max(maxR,ratio)
+                    minR=min(minR,ratio)
+                    sumR+=ratio
+                    icount+=1
+                except:
+                    continue
+            
+            ifh.close()
+            self.lfh.write("f/sig(f) min %f max %f avg %f count %d\n" % (minR, maxR, sumR/icount,icount))
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+
+
+def simpleSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(PdbxReaderTests("testReadBigDataFile"))
+    suiteSelect.addTest(PdbxReaderTests("testReadSmallDataFile"))    
+    suiteSelect.addTest(PdbxReaderTests("testReadSFDataFile"))
+    return suiteSelect
+
+if __name__ == '__main__':
+    mySuite=simpleSuite()    
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
+    #
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/reader/__init__.py
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriter.py
+##
+# File:  PdbxWriter.py
+# Date:  2011-10-09 Jdw  Adapted from PdbxParser.py
+#
+# Updates:
+#    5-Apr-2011 jdw  Using the double quote format preference
+#   23-Oct-2012 jdw  update path details and reorganize.
+#
+###
+"""
+Classes for writing data and dictionary containers in PDBx/mmCIF format.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+
+
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+
+class PdbxError(Exception):
+    """ Class for catch general errors 
+    """
+    pass
+
+class PdbxWriter(object):
+    """Write PDBx data files or dictionaries using the input container
+       or container list.
+    """
+    def __init__(self,ofh=sys.stdout):
+        self.__ofh=ofh
+        self.__containerList=[]
+        self.__MAXIMUM_LINE_LENGTH = 2048
+        self.__SPACING = 2
+        self.__INDENT_DEFINITION = 3
+        self.__indentSpace = " " * self.__INDENT_DEFINITION        
+        self.__doDefinitionIndent=False
+        # Maximum number of rows checked for value length and format
+        self.__rowPartition=None
+
+    def setRowPartition(self,numRows):
+        ''' Maximum number of rows checked for value length and format
+        '''
+        self.__rowPartition=numRows
+        
+    def write(self, containerList):
+        self.__containerList=containerList
+        for  container in self.__containerList:
+            self.writeContainer(container)
+
+    def writeContainer(self,container):
+        indS=" " *  self.__INDENT_DEFINITION
+        if isinstance(container, DefinitionContainer):
+            self.__write("save_%s\n" % container.getName())
+            self.__doDefinitionIndent=True
+            self.__write(indS+"#\n")            
+        elif isinstance(container, DataContainer):
+            if (container.getGlobal()):
+                self.__write("global_\n")
+                self.__doDefinitionIndent=False
+                self.__write("\n")            
+            else:
+                self.__write("data_%s\n" % container.getName())
+                self.__doDefinitionIndent=False
+                self.__write("#\n")                            
+        
+        for nm in container.getObjNameList():
+            obj=container.getObj(nm)
+            objL=obj.getRowList()
+
+            # Skip empty objects
+            if len(objL) == 0:
+                continue
+
+            # Item - value formattting 
+            elif len(objL) == 1:
+                self.__writeItemValueFormat(obj)
+
+            # Table formatting - 
+            elif len(objL) > 1 and len(obj.getAttributeList()) > 0:
+                self.__writeTableFormat(obj)
+            else:
+                raise PdbxError()
+
+            if self.__doDefinitionIndent:
+                self.__write(indS+"#")
+            else:
+                self.__write("#")
+
+        # Add a trailing saveframe reserved word 
+        if isinstance(container, DefinitionContainer):
+            self.__write("\nsave_\n")
+        self.__write("#\n")            
+
+    def __write(self, st):
+        self.__ofh.write(st)
+
+    def __writeItemValueFormat(self, myCategory):
+
+        # Compute the maximum item name length within this category - 
+        attributeNameLengthMax  = 0
+        for attributeName in myCategory.getAttributeList():
+            attributeNameLengthMax = max(attributeNameLengthMax, len(attributeName))
+        itemNameLengthMax = self.__SPACING + len(myCategory.getName()) + attributeNameLengthMax + 2
+        #
+        lineList=[]
+        lineList.append("#\n")        
+        for attributeName,iPos in myCategory.getAttributeListWithOrder():        
+            if  self.__doDefinitionIndent:
+                #        - add indent --                
+                lineList.append(self.__indentSpace)
+                
+            itemName = "_%s.%s" % (myCategory.getName(), attributeName)                
+            lineList.append(itemName.ljust(itemNameLengthMax))
+            
+            lineList.append(myCategory.getValueFormatted(attributeName,0))            
+            lineList.append("\n")
+            
+        self.__write("".join(lineList))
+
+    def __writeTableFormat(self, myCategory):
+        
+        # Write the declaration of the loop_
+        #
+        lineList=[]
+        lineList.append('#\n')
+        if  self.__doDefinitionIndent:            
+            lineList.append(self.__indentSpace)
+        lineList.append("loop_")
+        for attributeName in myCategory.getAttributeList():
+            lineList.append('\n')            
+            if  self.__doDefinitionIndent:            
+                lineList.append(self.__indentSpace)
+            itemName = "_%s.%s" % (myCategory.getName(), attributeName)
+            lineList.append(itemName)
+        self.__write("".join(lineList))
+
+        #
+        # Write the data in tabular format - 
+        #
+        #print myCategory.getName()
+        #print myCategory.getAttributeList()
+
+        #    For speed make the following evaluation on a portion of the table
+        if self.__rowPartition is not None:
+            numSteps=max(1,myCategory.getRowCount()/self.__rowPartition)
+        else:
+            numSteps=1
+            
+        formatTypeList,dataTypeList=myCategory.getFormatTypeList(steps=numSteps)
+        maxLengthList=myCategory.getAttributeValueMaxLengthList(steps=numSteps)
+        spacing   = " " * self.__SPACING
+        #
+
+        #print formatTypeList
+        #print dataTypeList
+        #print maxLengthList
+        #
+        for iRow in range(myCategory.getRowCount()):
+            lineList     = []            
+            lineList.append('\n')
+            if  self.__doDefinitionIndent:
+                lineList.append(self.__indentSpace + "  ")
+
+            for iAt in range(myCategory.getAttributeCount()):
+                formatType = formatTypeList[iAt]
+                maxLength  = maxLengthList[iAt]
+
+                if (formatType == 'FT_UNQUOTED_STRING' or formatType == 'FT_NULL_VALUE'):
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.ljust(maxLength))
+
+                elif formatType == 'FT_NUMBER':
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val.rjust(maxLength))
+                    
+                elif formatType == 'FT_QUOTED_STRING':
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+
+                    lineList.append(val.ljust(maxLength+2))
+
+                elif formatType == "FT_MULTI_LINE_STRING":
+                    val=myCategory.getValueFormattedByIndex(iAt,iRow)
+                    lineList.append(val)
+                    
+                lineList.append(spacing)                    
+
+            self.__write("".join(lineList))
+        self.__write("\n")
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/writer/PdbxWriterTests.py
+##
+# File:    PdbxWriterTests.py
+# Author:  jdw
+# Date:    3-November-2009
+# Version: 0.001
+#
+# Update:
+#  5-Apr-2011 jdw   Using the double quote format preference
+# 24-Oct-2012 jdw   Update path and examples.
+##
+"""
+Test implementing PDBx/mmCIF write and formatting operations.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__    = "John Westbrook"
+__email__     = "jwest@rcsb.rutgers.edu"
+__license__   = "Creative Commons Attribution 3.0 Unported"
+__version__   = "V0.01"
+
+
+
+import sys, unittest, traceback
+import sys, time, os, os.path, shutil
+
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader  import PdbxReader
+from simtk.openmm.app.internal.pdbx.writer.PdbxWriter  import PdbxWriter
+from simtk.openmm.app.internal.pdbx.reader.PdbxContainers import *
+
+class PdbxWriterTests(unittest.TestCase):
+    def setUp(self):
+        self.lfh=sys.stderr
+        self.verbose=False
+        self.pathPdbxDataFile     ="../tests/1kip.cif"
+        self.pathOutputFile       ="testOutputDataFile.cif"
+
+    def tearDown(self):
+        pass
+
+    def testWriteDataFile(self): 
+        """Test case -  write data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ofh = open("test-output.cif", "w")
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            curContainer.append(aCat)
+            myDataList.append(curContainer)
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+
+    def testUpdateDataFile(self): 
+        """Test case -  write data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            # Create a initial data file --
+            #
+            myDataList=[]
+            ofh = open("test-output-1.cif", "w")
+            curContainer=DataContainer("myblock")
+            aCat=DataCategory("pdbx_seqtool_mapping_ref")
+            aCat.appendAttribute("ordinal")
+            aCat.appendAttribute("entity_id")
+            aCat.appendAttribute("auth_mon_id")
+            aCat.appendAttribute("auth_mon_num")
+            aCat.appendAttribute("pdb_chain_id")
+            aCat.appendAttribute("ref_mon_id")
+            aCat.appendAttribute("ref_mon_num")                        
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            aCat.append((1,2,3,4,5,6,7))
+            curContainer.append(aCat)
+            myDataList.append(curContainer)
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()
+            #
+            # Read and update the data -
+            # 
+            myDataList=[]
+            ifh = open("test-output-1.cif", "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()
+            #
+            myBlock=myDataList[0]
+            myBlock.printIt()
+            myCat=myBlock.getObj('pdbx_seqtool_mapping_ref')
+            myCat.printIt()
+            for iRow in xrange(0,myCat.getRowCount()):
+                myCat.setValue('some value', 'ref_mon_id',iRow)
+                myCat.setValue(100, 'ref_mon_num',iRow)
+            ofh = open("test-output-2.cif", "w")            
+            pdbxW=PdbxWriter(ofh)
+            pdbxW.write(myDataList)
+            ofh.close()            
+            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+
+    def testReadDataFile(self): 
+        """Test case -  read data file 
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+
+    def testReadWriteDataFile(self): 
+        """Test case -  data file read write test
+        """
+        self.lfh.write("\nStarting %s %s\n" % (self.__class__.__name__,
+                                               sys._getframe().f_code.co_name))
+        try:
+            #
+            myDataList=[]
+            ifh = open(self.pathPdbxDataFile, "r")            
+            pRd=PdbxReader(ifh)
+            pRd.read(myDataList)
+            ifh.close()            
+            
+            ofh = open(self.pathOutputFile, "w")
+            pWr=PdbxWriter(ofh)
+            pWr.write(myDataList)        
+            ofh.close()
+        except:
+            traceback.print_exc(file=sys.stderr)
+            self.fail()
+
+def suite():
+    return unittest.makeSuite(PdbxWriterTests,'test')
+
+if __name__ == '__main__':
+    unittest.main()
--- a/wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
+++ b/wrappers/python/simtk/openmm/app/internal/pdbx/writer/__init__.py
--- a/wrappers/python/simtk/openmm/app/pdbxfile.py
+++ b/wrappers/python/simtk/openmm/app/pdbxfile.py
+"""
+pdbfile.py: Used for loading PDB files.
+
+This is part of the OpenMM molecular simulation toolkit originating from
+Simbios, the NIH National Center for Physics-Based Simulation of
+Biological Structures at Stanford, funded under the NIH Roadmap for
+Medical Research, grant U54 GM072970. See https://simtk.org.
+
+Portions copyright (c) 2014 Stanford University and the Authors.
+Authors: Peter Eastman
+Contributors:
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+__author__ = "Peter Eastman"
+__version__ = "1.0"
+
+import os
+import sys
+from simtk.openmm import Vec3
+from simtk.openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
+from simtk.openmm.app import Topology
+from simtk.unit import nanometers, angstroms, is_quantity, norm, Quantity
+import element as elem
+try:
+    import numpy
+except:
+    pass
+
+class PDBxFile(object):
+    """PDBxFile parses a PDBx/mmCIF file and constructs a Topology and a set of atom positions from it."""
+
+    def __init__(self, file):
+        """Load a PDBx/mmCIF file.
+
+        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().
+
+        Parameters:
+         - file (string) the name of the file to load.  Alternatively you can pass an open file object.
+        """
+        top = Topology()
+        ## The Topology read from the PDBx/mmCIF file
+        self.topology = top
+        self._positions = []
+
+        # Load the file.
+
+        inputFile = file
+        if isinstance(file, str):
+            inputFile = open(file)
+        reader = PdbxReader(inputFile)
+        data = []
+        reader.read(data)
+        block = data[0]
+
+        # Build the topology.
+
+        atomData = block.getObj('atom_site')
+        atomNameCol = atomData.getAttributeIndex('label_atom_id')
+        atomIdCol = atomData.getAttributeIndex('id')
+        resNameCol = atomData.getAttributeIndex('label_comp_id')
+        resIdCol = atomData.getAttributeIndex('label_seq_id')
+        asymIdCol = atomData.getAttributeIndex('label_asym_id')
+        chainIdCol = atomData.getAttributeIndex('label_entity_id')
+        elementCol = atomData.getAttributeIndex('type_symbol')
+        modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num')
+        xCol = atomData.getAttributeIndex('Cartn_x')
+        yCol = atomData.getAttributeIndex('Cartn_y')
+        zCol = atomData.getAttributeIndex('Cartn_z')
+        lastChainId = None
+        lastResId = None
+        lastAsymId = None
+        atomTable = {}
+        models = []
+        for row in atomData.getRowList():
+            asymId = ('A' if asymIdCol == -1 else row[asymIdCol])
+            atomKey = ((row[resIdCol], asymId, row[atomNameCol]))
+            model = ('1' if modelCol == -1 else row[modelCol])
+            if model not in models:
+                models.append(model)
+                self._positions.append([])
+            modelIndex = models.index(model)
+            if modelIndex == 0:
+                # This row defines a new atom.
+
+                if lastChainId != row[chainIdCol]:
+                    # The start of a new chain.
+                    chain = top.addChain()
+                    lastChainId = row[chainIdCol]
+                    lastResId = None
+                    lastAsymId = None
+                if lastResId != row[resIdCol] or lastAsymId != asymId:
+                    # The start of a new residue.
+                    res = top.addResidue(row[resNameCol], chain)
+                    lastResId = row[resIdCol]
+                    lastAsymId = asymId
+                element = None
+                try:
+                    element = elem.get_by_symbol(row[elementCol])
+                except KeyError:
+                    pass
+                atom = top.addAtom(row[atomNameCol], element, res)
+                atomTable[atomKey] = atom
+            else:
+                # This row defines coordinates for an existing atom in one of the later models.
+
+                try:
+                    atom = atomTable[atomKey]
+                except KeyError:
+                    raise ValueError('Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resIdCol], model))
+                if atom.index != len(self._positions[modelIndex]):
+                    raise ValueError('Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0]))
+            self._positions[modelIndex].append(Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol]))*0.1)
+        for i in range(len(self._positions)):
+            self._positions[i] = self._positions[i]*nanometers
+        ## The atom positions read from the PDBx/mmCIF file.  If the file contains multiple frames, these are the positions in the first frame.
+        self.positions = self._positions[0]
+        self.topology.createStandardBonds()
+        self.topology.createDisulfideBonds(self.positions)
+        self._numpyPositions = None
+
+        # Record unit cell information, if present.
+
+        cell = block.getObj('cell')
+        if cell is not None and cell.getRowCount() > 0:
+            row = cell.getRow(0)
+            cellSize = [float(row[cell.getAttributeIndex(attribute)]) for attribute in ('length_a', 'length_b', 'length_c')]*angstroms
+            self.topology.setUnitCellDimensions(cellSize)
+
+        # Add bonds based on struct_conn records.
+
+        connectData = block.getObj('struct_conn')
+        if connectData is not None:
+            res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id')
+            res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id')
+            atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id')
+            atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id')
+            asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id')
+            asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id')
+            typeCol = connectData.getAttributeIndex('conn_type_id')
+            connectBonds = []
+            for row in connectData.getRowList():
+                type = row[typeCol][:6]
+                if type in ('covale', 'disulf', 'modres'):
+                    key1 = (row[res1Col], row[asym1Col], row[atom1Col])
+                    key2 = (row[res2Col], row[asym2Col], row[atom2Col])
+                    if key1 in atomTable and key2 in atomTable:
+                        connectBonds.append((atomTable[key1], atomTable[key2]))
+            if len(connectBonds) > 0:
+                # Only add bonds that don't already exist.
+                existingBonds = set(top.bonds())
+                for bond in connectBonds:
+                    if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
+                        top.addBond(bond[0], bond[1])
+                        existingBonds.add(bond)
+
+    def getTopology(self):
+        """Get the Topology of the model."""
+        return self.topology
+
+    def getNumFrames(self):
+        """Get the number of frames stored in the file."""
+        return len(self._positions)
+
+    def getPositions(self, asNumpy=False, frame=0):
+        """Get the atomic positions.
+
+        Parameters:
+         - asNumpy (boolean=False) if true, the values are returned as a numpy array instead of a list of Vec3s
+         - frame (int=0) the index of the frame for which to get positions
+         """
+        if asNumpy:
+            if self._numpyPositions is None:
+                self._numpyPositions = [None]*len(self._positions)
+            if self._numpyPositions[frame] is None:
+                self._numpyPositions[frame] = Quantity(numpy.array(self._positions[frame].value_in_unit(nanometers)), nanometers)
+            return self._numpyPositions[frame]
+        return self._positions[frame]