modeller.py 48.4 KB
Newer Older
1
2
"""
modeller.py: Provides tools for editing molecular models
3
4
5
6
7
8

This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.

9
Portions copyright (c) 2012-2013 Stanford University and the Authors.
10
11
12
Authors: Peter Eastman
Contributors:

Justin MacCallum's avatar
Justin MacCallum committed
13
Permission is hereby granted, free of charge, to any person obtaining a
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
30
"""
Peter Eastman's avatar
Peter Eastman committed
31
32
from __future__ import division

33
34
35
__author__ = "Peter Eastman"
__version__ = "1.0"

36
from simtk.openmm.app import Topology, PDBFile, ForceField
peastman's avatar
peastman committed
37
from simtk.openmm.app.forcefield import HAngles, _createResidueSignature, _matchResidue, DrudeGenerator
38
from simtk.openmm.app.topology import Residue
39
from simtk.openmm.vec3 import Vec3
40
from simtk.openmm import System, Context, NonbondedForce, VerletIntegrator, LocalEnergyMinimizer
41
from simtk.unit import nanometer, molar, elementary_charge, amu, gram, liter, degree, sqrt, acos, is_quantity, dot, norm
42
import element as elem
Peter Eastman's avatar
Peter Eastman committed
43
44
import os
import random
Peter Eastman's avatar
Peter Eastman committed
45
import xml.etree.ElementTree as etree
Peter Eastman's avatar
Peter Eastman committed
46
47
from copy import deepcopy
from math import ceil, floor
48
49
50

class Modeller(object):
    """Modeller provides tools for editing molecular models, such as adding water or missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
51

52
53
54
55
56
    To use it, create a Modeller object, specifying the initial Topology and atom positions.  You can
    then call various methods to change the model in different ways.  Each time you do, a new Topology
    and list of coordinates is created to represent the changed model.  Finally, call getTopology()
    and getPositions() to get the results.
    """
Justin MacCallum's avatar
Justin MacCallum committed
57

58
59
    _residueHydrogens = {}
    _hasLoadedStandardHydrogens = False
Peter Eastman's avatar
Peter Eastman committed
60

61
62
    def __init__(self, topology, positions):
        """Create a new Modeller object
Justin MacCallum's avatar
Justin MacCallum committed
63

64
65
66
67
        Parameters:
         - topology (Topology) the initial Topology of the model
         - positions (list) the initial atomic positions
        """
68
        ## The Topology describing the structure of the system
69
        self.topology = topology
Peter Eastman's avatar
Peter Eastman committed
70
        if not is_quantity(positions):
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
71
            positions = positions*nanometer
72
        ## The list of atom positions
73
        self.positions = positions
Justin MacCallum's avatar
Justin MacCallum committed
74

75
76
77
    def getTopology(self):
        """Get the Topology of the model."""
        return self.topology
Justin MacCallum's avatar
Justin MacCallum committed
78

79
80
81
82
    def getPositions(self):
        """Get the atomic positions."""
        return self.positions

83
84
    def add(self, addTopology, addPositions):
        """Add chains, residues, atoms, and bonds to the model.
Justin MacCallum's avatar
Justin MacCallum committed
85

86
87
        Specify what to add by providing a new Topology object and the corresponding atomic positions.
        All chains, residues, atoms, and bonds contained in the Topology are added to the model.
Justin MacCallum's avatar
Justin MacCallum committed
88

89
90
91
92
93
        Parameters:
         - addTopoology (Topology) a Topology whose contents should be added to the model
         - addPositions (list) the positions of the atoms to add
        """
        # Copy over the existing model.
Justin MacCallum's avatar
Justin MacCallum committed
94

95
        newTopology = Topology()
Peter Eastman's avatar
Peter Eastman committed
96
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
97
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
98
        newPositions = []*nanometer
99
100
101
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
102
103
104
105
106
107
108
                newResidue = newTopology.addResidue(residue.name, newChain)
                for atom in residue.atoms():
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
109

110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
        # Add the new model

        newAtoms = {}
        for chain in addTopology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
                for atom in residue.atoms():
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(addPositions[atom.index]))
        for bond in addTopology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions

    def delete(self, toDelete):
        """Delete chains, residues, atoms, and bonds from the model.
Justin MacCallum's avatar
Justin MacCallum committed
128

129
130
131
132
        You can specify objects to delete at any granularity: atoms, residues, or chains.  Passing
        in an Atom object causes that Atom to be deleted.  Passing in a Residue object causes that
        Residue and all Atoms it contains to be deleted.  Passing in a Chain object causes that
        Chain and all Residues and Atoms it contains to be deleted.
Justin MacCallum's avatar
Justin MacCallum committed
133

134
135
136
        In all cases, when an Atom is deleted, any bonds it participates in are also deleted.
        You also can specify a bond (as a tuple of Atom objects) to delete just that bond without
        deleting the Atoms it connects.
Justin MacCallum's avatar
Justin MacCallum committed
137

138
139
140
141
142
143
144
145
146
147
        Parameters:
         - toDelete (list) a list of Atoms, Residues, Chains, and bonds (specified as tuples of Atoms) to delete
        """
        newTopology = Topology()
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
        newAtoms = {}
        newPositions = []*nanometer
        deleteSet = set(toDelete)
        for chain in self.topology.chains():
            if chain not in deleteSet:
148
                needNewChain = True;
149
150
                for residue in chain.residues():
                    if residue not in deleteSet:
151
                        needNewResidue = True
152
153
                        for atom in residue.atoms():
                            if atom not in deleteSet:
154
155
156
157
158
159
                                if needNewChain:
                                    newChain = newTopology.addChain()
                                    needNewChain = False;
                                if needNewResidue:
                                    newResidue = newTopology.addResidue(residue.name, newChain)
                                    needNewResidue = False;
160
161
162
                                newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                                newAtoms[atom] = newAtom
                                newPositions.append(deepcopy(self.positions[atom.index]))
163
164
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
165
166
                if bond not in deleteSet and (bond[1], bond[0]) not in deleteSet:
                    newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
167
168
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
169

170
171
172
    def deleteWater(self):
        """Delete all water molecules from the model."""
        self.delete(res for res in self.topology.residues() if res.name == "HOH")
Justin MacCallum's avatar
Justin MacCallum committed
173

174
175
    def convertWater(self, model='tip3p'):
        """Convert all water molecules to a different water model.
Justin MacCallum's avatar
Justin MacCallum committed
176

177
        Parameters:
Peter Eastman's avatar
Peter Eastman committed
178
         - model (string='tip3p') the water model to convert to.  Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
179
180
        
        @deprecated Use addExtraParticles() instead.  It performs the same function but in a more general way.
181
        """
Peter Eastman's avatar
Peter Eastman committed
182
        if model in ('tip3p', 'spce'):
183
184
185
186
187
188
189
190
            sites = 3
        elif model == 'tip4pew':
            sites = 4
        elif model == 'tip5p':
            sites = 5
        else:
            raise ValueError('Unknown water model: %s' % model)
        newTopology = Topology()
Peter Eastman's avatar
Peter Eastman committed
191
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
192
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
193
        newPositions = []*nanometer
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
                if residue.name == "HOH":
                    # Copy the oxygen and hydrogens
                    oatom = [atom for atom in residue.atoms() if atom.element == elem.oxygen]
                    hatoms = [atom for atom in residue.atoms() if atom.element == elem.hydrogen]
                    if len(oatom) != 1 or len(hatoms) != 2:
                        raise ValueError('Illegal water molecule (residue %d): contains %d oxygen(s) and %d hydrogen(s)' % (residue.index, len(oatom), len(hatoms)))
                    o = newTopology.addAtom(oatom[0].name, oatom[0].element, newResidue)
                    h1 = newTopology.addAtom(hatoms[0].name, hatoms[0].element, newResidue)
                    h2 = newTopology.addAtom(hatoms[1].name, hatoms[1].element, newResidue)
                    newAtoms[oatom[0]] = o
                    newAtoms[hatoms[0]] = h1
                    newAtoms[hatoms[1]] = h2
Peter Eastman's avatar
Peter Eastman committed
210
211
212
                    po = deepcopy(self.positions[oatom[0].index])
                    ph1 = deepcopy(self.positions[hatoms[0].index])
                    ph2 = deepcopy(self.positions[hatoms[1].index])
213
214
215
                    newPositions.append(po)
                    newPositions.append(ph1)
                    newPositions.append(ph2)
Justin MacCallum's avatar
Justin MacCallum committed
216

217
                    # Add virtual sites.
Justin MacCallum's avatar
Justin MacCallum committed
218

219
220
                    if sites == 4:
                        newTopology.addAtom('M', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
221
                        newPositions.append(0.786646558*po + 0.106676721*ph1 + 0.106676721*ph2)
222
223
224
                    elif sites == 5:
                        newTopology.addAtom('M1', None, newResidue)
                        newTopology.addAtom('M2', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
225
226
                        v1 = (ph1-po).value_in_unit(nanometer)
                        v2 = (ph2-po).value_in_unit(nanometer)
227
                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Peter Eastman's avatar
Peter Eastman committed
228
229
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 - 6.4437903*cross)*nanometer)
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 + 6.4437903*cross)*nanometer)
230
231
232
233
234
                else:
                    # Just copy the residue over.
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
Peter Eastman's avatar
Peter Eastman committed
235
                        newPositions.append(deepcopy(self.positions[atom.index]))
236
237
238
239
240
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions
Peter Eastman's avatar
Peter Eastman committed
241
242
243

    def addSolvent(self, forcefield, model='tip3p', boxSize=None, padding=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar):
        """Add solvent (both water and ions) to the model to fill a rectangular box.
Justin MacCallum's avatar
Justin MacCallum committed
244

Peter Eastman's avatar
Peter Eastman committed
245
246
247
248
249
250
        The algorithm works as follows:
        1. Water molecules are added to fill the box.
        2. Water molecules are removed if their distance to any solute atom is less than the sum of their van der Waals radii.
        3. If the solute is charged, enough positive or negative ions are added to neutralize it.  Each ion is added by
           randomly selecting a water molecule and replacing it with the ion.
        4. Ion pairs are added to give the requested total ionic strength.
Justin MacCallum's avatar
Justin MacCallum committed
251

Peter Eastman's avatar
Peter Eastman committed
252
253
254
255
        The box size can be specified in three ways.  First, you can explicitly give a box size to use.  Alternatively, you can
        give a padding distance.  The largest dimension of the solute (along the x, y, or z axis) is determined, and a cubic
        box of size (largest dimension)+2*padding is used.  Finally, if neither a box size nor a padding distance is specified,
        the existing Topology's unit cell dimensions are used.
Justin MacCallum's avatar
Justin MacCallum committed
256

Peter Eastman's avatar
Peter Eastman committed
257
258
        Parameters:
         - forcefield (ForceField) the ForceField to use for determining van der Waals radii and atomic charges
Peter Eastman's avatar
Peter Eastman committed
259
         - model (string='tip3p') the water model to use.  Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
Peter Eastman's avatar
Peter Eastman committed
260
261
262
263
264
265
266
267
268
         - boxSize (Vec3=None) the size of the box to fill with water
         - padding (distance=None) the padding distance to use
         - positiveIon (string='Na+') the type of positive ion to add.  Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'
         - negativeIon (string='Cl-') the type of negative ion to add.  Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware
           that not all force fields support all ion types.
         - ionicString (concentration=0*molar) the total concentration of ions (both positive and negative) to add.  This
           does not include ions that are added to neutralize the system.
        """
        # Pick a unit cell size.
Justin MacCallum's avatar
Justin MacCallum committed
269

Peter Eastman's avatar
Peter Eastman committed
270
        if boxSize is not None:
271
272
273
            if is_quantity(boxSize):
                boxSize = boxSize.value_in_unit(nanometer)
            box = Vec3(boxSize[0], boxSize[1], boxSize[2])*nanometer
Peter Eastman's avatar
Peter Eastman committed
274
275
276
277
        elif padding is not None:
            maxSize = max(max((pos[i] for pos in self.positions))-min((pos[i] for pos in self.positions)) for i in range(3))
            box = (maxSize+2*padding)*Vec3(1, 1, 1)
        else:
278
            box = self.topology.getUnitCellDimensions()
Peter Eastman's avatar
Peter Eastman committed
279
280
281
282
            if box is None:
                raise ValueError('Neither the box size nor padding was specified, and the Topology does not define unit cell dimensions')
        box = box.value_in_unit(nanometer)
        invBox = Vec3(1.0/box[0], 1.0/box[1], 1.0/box[2])
Justin MacCallum's avatar
Justin MacCallum committed
283

Peter Eastman's avatar
Peter Eastman committed
284
        # Identify the ion types.
Justin MacCallum's avatar
Justin MacCallum committed
285

Peter Eastman's avatar
Peter Eastman committed
286
287
288
289
290
291
292
293
        posIonElements = {'Cs+':elem.cesium, 'K+':elem.potassium, 'Li+':elem.lithium, 'Na+':elem.sodium, 'Rb+':elem.rubidium}
        negIonElements = {'Cl-':elem.chlorine, 'Br-':elem.bromine, 'F-':elem.fluorine, 'I-':elem.iodine}
        if positiveIon not in posIonElements:
            raise ValueError('Illegal value for positive ion: %s' % positiveIon)
        if negativeIon not in negIonElements:
            raise ValueError('Illegal value for negative ion: %s' % negativeIon)
        positiveElement = posIonElements[positiveIon]
        negativeElement = negIonElements[negativeIon]
Justin MacCallum's avatar
Justin MacCallum committed
294

Peter Eastman's avatar
Peter Eastman committed
295
        # Load the pre-equilibrated water box.
Justin MacCallum's avatar
Justin MacCallum committed
296

Peter Eastman's avatar
Peter Eastman committed
297
298
299
        vdwRadiusPerSigma = 0.5612310241546864907
        if model == 'tip3p':
            waterRadius = 0.31507524065751241*vdwRadiusPerSigma
Peter Eastman's avatar
Peter Eastman committed
300
301
        elif model == 'spce':
            waterRadius = 0.31657195050398818*vdwRadiusPerSigma
Peter Eastman's avatar
Peter Eastman committed
302
303
304
305
306
307
308
309
310
311
312
        elif model == 'tip4pew':
            waterRadius = 0.315365*vdwRadiusPerSigma
        elif model == 'tip5p':
            waterRadius = 0.312*vdwRadiusPerSigma
        else:
            raise ValueError('Unknown water model: %s' % model)
        pdb = PDBFile(os.path.join(os.path.dirname(__file__), 'data', model+'.pdb'))
        pdbTopology = pdb.getTopology()
        pdbPositions = pdb.getPositions().value_in_unit(nanometer)
        pdbResidues = list(pdbTopology.residues())
        pdbBoxSize = pdbTopology.getUnitCellDimensions().value_in_unit(nanometer)
Justin MacCallum's avatar
Justin MacCallum committed
313

Peter Eastman's avatar
Peter Eastman committed
314
        # Have the ForceField build a System for the solute from which we can determine van der Waals radii.
Justin MacCallum's avatar
Justin MacCallum committed
315

Peter Eastman's avatar
Peter Eastman committed
316
317
318
319
320
321
322
323
        system = forcefield.createSystem(self.topology)
        nonbonded = None
        for i in range(system.getNumForces()):
            if isinstance(system.getForce(i), NonbondedForce):
                nonbonded = system.getForce(i)
        if nonbonded is None:
            raise ValueError('The ForceField does not specify a NonbondedForce')
        cutoff = [nonbonded.getParticleParameters(i)[1].value_in_unit(nanometer)*vdwRadiusPerSigma+waterRadius for i in range(system.getNumParticles())]
324
        waterCutoff = waterRadius
Peter Eastman's avatar
Peter Eastman committed
325
326
327
328
        if len(cutoff) == 0:
            maxCutoff = waterCutoff
        else:
            maxCutoff = max(waterCutoff, max(cutoff))
Justin MacCallum's avatar
Justin MacCallum committed
329

Peter Eastman's avatar
Peter Eastman committed
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
        # Copy the solute over.

        newTopology = Topology()
        newTopology.setUnitCellDimensions(box)
        newAtoms = {}
        newPositions = []*nanometer
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
                for atom in residue.atoms():
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
345
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
346

Peter Eastman's avatar
Peter Eastman committed
347
        # Sort the solute atoms into cells for fast lookup.
Justin MacCallum's avatar
Justin MacCallum committed
348

Peter Eastman's avatar
Peter Eastman committed
349
350
351
352
        if len(self.positions) == 0:
            positions = []
        else:
            positions = self.positions.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
353
        cells = {}
Peter Eastman's avatar
Peter Eastman committed
354
        numCells = tuple((max(1, int(floor(box[i]/maxCutoff))) for i in range(3)))
Peter Eastman's avatar
Peter Eastman committed
355
356
357
358
359
360
361
        cellSize = tuple((box[i]/numCells[i] for i in range(3)))
        for i in range(len(positions)):
            cell = tuple((int(floor(positions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
            if cell in cells:
                cells[cell].append(i)
            else:
                cells[cell] = [i]
Justin MacCallum's avatar
Justin MacCallum committed
362

Peter Eastman's avatar
Peter Eastman committed
363
        # Create a generator that loops over atoms close to a position.
Justin MacCallum's avatar
Justin MacCallum committed
364

Peter Eastman's avatar
Peter Eastman committed
365
366
367
368
369
370
371
372
373
374
        def neighbors(pos):
            centralCell = tuple((int(floor(pos[i]/cellSize[i])) for i in range(3)))
            offsets = (-1, 0, 1)
            for i in offsets:
                for j in offsets:
                    for k in offsets:
                        cell = ((centralCell[0]+i+numCells[0])%numCells[0], (centralCell[1]+j+numCells[1])%numCells[1], (centralCell[2]+k+numCells[2])%numCells[2])
                        if cell in cells:
                            for atom in cells[cell]:
                                yield atom
Justin MacCallum's avatar
Justin MacCallum committed
375

Peter Eastman's avatar
Peter Eastman committed
376
        # Define a function to compute the distance between two points, taking periodic boundary conditions into account.
Justin MacCallum's avatar
Justin MacCallum committed
377

Peter Eastman's avatar
Peter Eastman committed
378
379
380
        def periodicDistance(pos1, pos2):
            delta = pos1-pos2
            delta = [delta[i]-floor(delta[i]*invBox[i]+0.5)*box[i] for i in range(3)]
381
            return norm(delta)
Justin MacCallum's avatar
Justin MacCallum committed
382

Peter Eastman's avatar
Peter Eastman committed
383
        # Find the list of water molecules to add.
Justin MacCallum's avatar
Justin MacCallum committed
384

Peter Eastman's avatar
Peter Eastman committed
385
        newChain = newTopology.addChain()
Peter Eastman's avatar
Peter Eastman committed
386
387
388
389
390
        if len(positions) == 0:
            center = Vec3(0, 0, 0)
        else:
            center = [(max((pos[i] for pos in positions))+min((pos[i] for pos in positions)))/2 for i in range(3)]
            center = Vec3(center[0], center[1], center[2])
Peter Eastman's avatar
Peter Eastman committed
391
392
393
394
395
396
397
398
399
400
401
        numBoxes = [int(ceil(box[i]/pdbBoxSize[i])) for i in range(3)]
        addedWaters = []
        for boxx in range(numBoxes[0]):
            for boxy in range(numBoxes[1]):
                for boxz in range(numBoxes[2]):
                    offset = Vec3(boxx*pdbBoxSize[0], boxy*pdbBoxSize[1], boxz*pdbBoxSize[2])
                    for residue in pdbResidues:
                        oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
                        atomPos = pdbPositions[oxygen.index]+offset
                        if not any((atomPos[i] > box[i] for i in range(3))):
                            # This molecule is inside the box, so see how close to it is to the solute.
Justin MacCallum's avatar
Justin MacCallum committed
402

Peter Eastman's avatar
Peter Eastman committed
403
404
405
406
407
408
                            atomPos += center-box/2
                            for i in neighbors(atomPos):
                                if periodicDistance(atomPos, positions[i]) < cutoff[i]:
                                    break
                            else:
                                # Record this water molecule as one to add.
Justin MacCallum's avatar
Justin MacCallum committed
409

Peter Eastman's avatar
Peter Eastman committed
410
                                addedWaters.append((residue.index, atomPos))
Justin MacCallum's avatar
Justin MacCallum committed
411

Peter Eastman's avatar
Peter Eastman committed
412
        # There could be clashes between water molecules at the box edges.  Find ones to remove.
Justin MacCallum's avatar
Justin MacCallum committed
413

Peter Eastman's avatar
Peter Eastman committed
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
        upperCutoff = center+box/2-Vec3(waterCutoff, waterCutoff, waterCutoff)
        lowerCutoff = center-box/2+Vec3(waterCutoff, waterCutoff, waterCutoff)
        lowerSkinPositions = [pos for index, pos in addedWaters if pos[0] < lowerCutoff[0] or pos[1] < lowerCutoff[1] or pos[2] < lowerCutoff[2]]
        filteredWaters = []
        cells = {}
        for i in range(len(lowerSkinPositions)):
            cell = tuple((int(floor(lowerSkinPositions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
            if cell in cells:
                cells[cell].append(i)
            else:
                cells[cell] = [i]
        for entry in addedWaters:
            pos = entry[1]
            if pos[0] < upperCutoff[0] and pos[1] < upperCutoff[1] and pos[2] < upperCutoff[2]:
                filteredWaters.append(entry)
            else:
Peter Eastman's avatar
Peter Eastman committed
430
                if not any((periodicDistance(lowerSkinPositions[i], pos) < waterCutoff and norm(lowerSkinPositions[i]-pos) > waterCutoff for i in neighbors(pos))):
Peter Eastman's avatar
Peter Eastman committed
431
432
                    filteredWaters.append(entry)
        addedWaters = filteredWaters
Justin MacCallum's avatar
Justin MacCallum committed
433

Peter Eastman's avatar
Peter Eastman committed
434
        # Add ions to neutralize the system.
Justin MacCallum's avatar
Justin MacCallum committed
435

436
        totalCharge = int(floor(0.5+sum((nonbonded.getParticleParameters(i)[0].value_in_unit(elementary_charge) for i in range(system.getNumParticles())))))
Peter Eastman's avatar
Peter Eastman committed
437
438
439
440
441
442
443
444
445
446
447
        if abs(totalCharge) > len(addedWaters):
            raise Exception('Cannot neutralize the system because the charge is greater than the number of available positions for ions')
        def addIon(element):
            # Replace a water by an ion.
            index = random.randint(0, len(addedWaters)-1)
            newResidue = newTopology.addResidue(element.symbol.upper(), newChain)
            newTopology.addAtom(element.symbol, element, newResidue)
            newPositions.append(addedWaters[index][1]*nanometer)
            del addedWaters[index]
        for i in range(abs(totalCharge)):
            addIon(positiveElement if totalCharge < 0 else negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
448

Peter Eastman's avatar
Peter Eastman committed
449
        # Add ions based on the desired ionic strength.
Justin MacCallum's avatar
Justin MacCallum committed
450

Peter Eastman's avatar
Peter Eastman committed
451
452
453
454
455
456
        numIons = len(addedWaters)*ionicStrength/(55.4*molar) # Pure water is about 55.4 molar (depending on temperature)
        numPairs = int(floor(numIons/2+0.5))
        for i in range(numPairs):
            addIon(positiveElement)
        for i in range(numPairs):
            addIon(negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
457

Peter Eastman's avatar
Peter Eastman committed
458
        # Add the water molecules.
Justin MacCallum's avatar
Justin MacCallum committed
459

Peter Eastman's avatar
Peter Eastman committed
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
        for index, pos in addedWaters:
            newResidue = newTopology.addResidue(residue.name, newChain)
            residue = pdbResidues[index]
            oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
            oPos = pdbPositions[oxygen.index]
            molAtoms = []
            for atom in residue.atoms():
                molAtoms.append(newTopology.addAtom(atom.name, atom.element, newResidue))
                newPositions.append((pos+pdbPositions[atom.index]-oPos)*nanometer)
            for atom1 in molAtoms:
                if atom1.element == elem.oxygen:
                    for atom2 in molAtoms:
                        if atom2.element == elem.hydrogen:
                            newTopology.addBond(atom1, atom2)
        newTopology.setUnitCellDimensions(deepcopy(box)*nanometer)
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
477

Peter Eastman's avatar
Peter Eastman committed
478
479
480
481
482
483
    class _ResidueData:
        """Inner class used to encapsulate data about the hydrogens for a residue."""
        def __init__(self, name):
            self.name = name
            self.variants = []
            self.hydrogens = []
Justin MacCallum's avatar
Justin MacCallum committed
484

Peter Eastman's avatar
Peter Eastman committed
485
486
487
488
489
490
491
492
    class _Hydrogen:
        """Inner class used to encapsulate data about a hydrogen atom."""
        def __init__(self, name, parent, maxph, variants, terminal):
            self.name = name
            self.parent = parent
            self.maxph = maxph
            self.variants = variants
            self.terminal = terminal
Justin MacCallum's avatar
Justin MacCallum committed
493

494
495
496
    @staticmethod
    def loadHydrogenDefinitions(file):
        """Load an XML file containing definitions of hydrogens that should be added by addHydrogens().
Justin MacCallum's avatar
Justin MacCallum committed
497

498
499
500
501
        The built in hydrogens.xml file containing definitions for standard amino acids and nucleotides is loaded automatically.
        This method can be used to load additional definitions for other residue types.  They will then be used in subsequent
        calls to addHydrogens().
        """
502
503
504
505
506
        tree = etree.parse(file)
        infinity = float('Inf')
        for residue in tree.getroot().findall('Residue'):
            resName = residue.attrib['name']
            data = Modeller._ResidueData(resName)
507
            Modeller._residueHydrogens[resName] = data
508
509
510
511
512
513
514
515
516
517
518
519
520
            for variant in residue.findall('Variant'):
                data.variants.append(variant.attrib['name'])
            for hydrogen in residue.findall('H'):
                maxph = infinity
                if 'maxph' in hydrogen.attrib:
                    maxph = float(hydrogen.attrib['maxph'])
                atomVariants = None
                if 'variant' in hydrogen.attrib:
                    atomVariants = hydrogen.attrib['variant'].split(',')
                terminal = None
                if 'terminal' in hydrogen.attrib:
                    terminal = hydrogen.attrib['terminal']
                data.hydrogens.append(Modeller._Hydrogen(hydrogen.attrib['name'], hydrogen.attrib['parent'], maxph, atomVariants, terminal))
Justin MacCallum's avatar
Justin MacCallum committed
521

522
    def addHydrogens(self, forcefield, pH=7.0, variants=None, platform=None):
Peter Eastman's avatar
Peter Eastman committed
523
        """Add missing hydrogens to the model.
Justin MacCallum's avatar
Justin MacCallum committed
524

Peter Eastman's avatar
Peter Eastman committed
525
526
527
        Some residues can exist in multiple forms depending on the pH and properties of the local environment.  These
        variants differ in the presence or absence of particular hydrogens.  In particular, the following variants
        are supported:
Justin MacCallum's avatar
Justin MacCallum committed
528

Peter Eastman's avatar
Peter Eastman committed
529
530
531
        Aspartic acid:
            ASH: Neutral form with a hydrogen on one of the delta oxygens
            ASP: Negatively charged form without a hydrogen on either delta oxygen
Justin MacCallum's avatar
Justin MacCallum committed
532

Peter Eastman's avatar
Peter Eastman committed
533
534
535
        Cysteine:
            CYS: Neutral form with a hydrogen on the sulfur
            CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond)
Justin MacCallum's avatar
Justin MacCallum committed
536

Peter Eastman's avatar
Peter Eastman committed
537
538
539
        Glutamic acid:
            GLH: Neutral form with a hydrogen on one of the epsilon oxygens
            GLU: Negatively charged form without a hydrogen on either epsilon oxygen
Justin MacCallum's avatar
Justin MacCallum committed
540

Peter Eastman's avatar
Peter Eastman committed
541
542
543
544
        Histidine:
            HID: Neutral form with a hydrogen on the ND1 atom
            HIE: Neutral form with a hydrogen on the NE2 atom
            HIP: Positively charged form with hydrogens on both ND1 and NE2
Justin MacCallum's avatar
Justin MacCallum committed
545

Peter Eastman's avatar
Peter Eastman committed
546
547
548
        Lysine:
            LYN: Neutral form with two hydrogens on the zeta nitrogen
            LYS: Positively charged form with three hydrogens on the zeta nitrogen
Justin MacCallum's avatar
Justin MacCallum committed
549

Peter Eastman's avatar
Peter Eastman committed
550
        The variant to use for each residue is determined by the following rules:
Justin MacCallum's avatar
Justin MacCallum committed
551

Peter Eastman's avatar
Peter Eastman committed
552
553
554
        1. The most common variant at the specified pH is selected.
        2. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH.
        3. For a neutral Histidine residue, the HID or HIE variant is selected based on which one forms a better hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
555

Peter Eastman's avatar
Peter Eastman committed
556
557
558
        You can override these rules by explicitly specifying a variant for any residue.  Also keep in mind that this
        function will only add hydrogens.  It will never remove ones that are already present in the model, regardless
        of the specified pH.
Justin MacCallum's avatar
Justin MacCallum committed
559

560
561
562
        Definitions for standard amino acids and nucleotides are built in.  You can call loadHydrogenDefinitions() to load
        additional definitions for other residue types.

Peter Eastman's avatar
Peter Eastman committed
563
564
565
566
567
568
        Parameters:
         - forcefield (ForceField) the ForceField to use for determining the positions of hydrogens
         - pH (float=7.0) the pH based on which to select variants
         - variants (list=None) an optional list of variants to use.  If this is specified, its length must equal the number
           of residues in the model.  variants[i] is the name of the variant to use for residue i (indexed starting at 0).
           If an element is None, the standard rules will be followed to select a variant for that residue.
569
570
         - platform (Platform=None) the Platform to use when computing the hydrogen atom positions.  If this is None,
           the default Platform will be used.
571
        Returns: a list of what variant was actually selected for each residue, in the same format as the variants parameter
Peter Eastman's avatar
Peter Eastman committed
572
573
        """
        # Check the list of variants.
Justin MacCallum's avatar
Justin MacCallum committed
574

Peter Eastman's avatar
Peter Eastman committed
575
576
577
578
579
580
        residues = list(self.topology.residues())
        if variants is not None:
            if len(variants) != len(residues):
                raise ValueError("The length of the variants list must equal the number of residues")
        else:
            variants = [None]*len(residues)
581
        actualVariants = [None]*len(residues)
Justin MacCallum's avatar
Justin MacCallum committed
582

Peter Eastman's avatar
Peter Eastman committed
583
        # Load the residue specifications.
Justin MacCallum's avatar
Justin MacCallum committed
584

585
586
        if not Modeller._hasLoadedStandardHydrogens:
            Modeller.loadHydrogenDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'hydrogens.xml'))
Peter Eastman's avatar
Peter Eastman committed
587
588

        # Make a list of atoms bonded to each atom.
Justin MacCallum's avatar
Justin MacCallum committed
589

Peter Eastman's avatar
Peter Eastman committed
590
        bonded = {}
591
592
        for atom in self.topology.atoms():
            bonded[atom] = []
Peter Eastman's avatar
Peter Eastman committed
593
594
595
        for atom1, atom2 in self.topology.bonds():
            bonded[atom1].append(atom2)
            bonded[atom2].append(atom1)
Justin MacCallum's avatar
Justin MacCallum committed
596

597
        # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria.
Justin MacCallum's avatar
Justin MacCallum committed
598

599
600
601
602
603
604
605
606
        def isHbond(d, h, a):
            if norm(d-a) > 0.35*nanometer:
                return False
            deltaDH = h-d
            deltaHA = a-h
            deltaDH /= norm(deltaDH)
            deltaHA /= norm(deltaHA)
            return acos(dot(deltaDH, deltaHA)) < 50*degree
Justin MacCallum's avatar
Justin MacCallum committed
607

Peter Eastman's avatar
Peter Eastman committed
608
        # Loop over residues.
Justin MacCallum's avatar
Justin MacCallum committed
609

Peter Eastman's avatar
Peter Eastman committed
610
611
612
613
614
615
616
617
618
619
620
621
        newTopology = Topology()
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
        newAtoms = {}
        newPositions = []*nanometer
        newIndices = []
        acceptors = [atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen)]
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
                isNTerminal = (residue == chain._residues[0])
                isCTerminal = (residue == chain._residues[-1])
622
                if residue.name in Modeller._residueHydrogens:
Peter Eastman's avatar
Peter Eastman committed
623
                    # Add hydrogens.  First select which variant to use.
Justin MacCallum's avatar
Justin MacCallum committed
624

625
                    spec = Modeller._residueHydrogens[residue.name]
Peter Eastman's avatar
Peter Eastman committed
626
627
628
629
                    variant = variants[residue.index]
                    if variant is None:
                        if residue.name == 'CYS':
                            # If this is part of a disulfide, use CYX.
Justin MacCallum's avatar
Justin MacCallum committed
630

Peter Eastman's avatar
Peter Eastman committed
631
632
633
634
                            sulfur = [atom for atom in residue.atoms() if atom.element == elem.sulfur]
                            if len(sulfur) == 1 and any((atom.residue != residue for atom in bonded[sulfur[0]])):
                                variant = 'CYX'
                        if residue.name == 'HIS' and pH > 6.5:
635
                            # See if either nitrogen already has a hydrogen attached.
Justin MacCallum's avatar
Justin MacCallum committed
636

637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
                            nd1 = [atom for atom in residue.atoms() if atom.name == 'ND1']
                            ne2 = [atom for atom in residue.atoms() if atom.name == 'NE2']
                            if len(nd1) != 1 or len(ne2) != 1:
                                raise ValueError('HIS residue (%d) has the wrong set of atoms' % residue.index)
                            nd1 = nd1[0]
                            ne2 = ne2[0]
                            nd1HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[nd1]))
                            ne2HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[ne2]))
                            if nd1HasHydrogen and ne2HasHydrogen:
                                variant = 'HIP'
                            elif nd1HasHydrogen:
                                variant = 'HID'
                            elif ne2HasHydrogen:
                                variant = 'HIE'
                            else:
                                # Estimate the hydrogen positions.
Justin MacCallum's avatar
Justin MacCallum committed
653

654
655
656
657
658
659
660
661
662
663
664
665
666
667
                                nd1Pos = self.positions[nd1.index]
                                ne2Pos = self.positions[ne2.index]
                                hd1Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[nd1]:
                                    hd1Delta += nd1Pos-self.positions[other.index]
                                hd1Delta *= 0.1*nanometer/norm(hd1Delta)
                                hd1Pos = nd1Pos+hd1Delta
                                he2Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[ne2]:
                                    he2Delta += ne2Pos-self.positions[other.index]
                                he2Delta *= 0.1*nanometer/norm(he2Delta)
                                he2Pos = ne2Pos+he2Delta

                                # See whether either hydrogen would form a hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
668

669
670
671
672
673
674
675
676
677
678
679
680
681
682
                                nd1IsBonded = False
                                ne2IsBonded = False
                                for acceptor in acceptors:
                                    if acceptor.residue != residue:
                                        acceptorPos = self.positions[acceptor.index]
                                        if isHbond(nd1Pos, hd1Pos, acceptorPos):
                                            nd1IsBonded = True
                                            break
                                        if isHbond(ne2Pos, he2Pos, acceptorPos):
                                            ne2IsBonded = True
                                if ne2IsBonded and not nd1IsBonded:
                                    variant = 'HIE'
                                else:
                                    variant = 'HID'
Peter Eastman's avatar
Peter Eastman committed
683
684
685
686
                        elif residue.name == 'HIS':
                            variant = 'HIP'
                    if variant is not None and variant not in spec.variants:
                        raise ValueError('Illegal variant for %s residue: %s' % (residue.name, variant))
687
                    actualVariants[residue.index] = variant
Justin MacCallum's avatar
Justin MacCallum committed
688

Peter Eastman's avatar
Peter Eastman committed
689
                    # Make a list of hydrogens that should be present in the residue.
Justin MacCallum's avatar
Justin MacCallum committed
690

Peter Eastman's avatar
Peter Eastman committed
691
692
693
694
695
                    parents = [atom for atom in residue.atoms() if atom.element != elem.hydrogen]
                    parentNames = [atom.name for atom in parents]
                    hydrogens = [h for h in spec.hydrogens if (variant is None and pH <= h.maxph) or (h.variants is None and pH <= h.maxph) or (h.variants is not None and variant in h.variants)]
                    hydrogens = [h for h in hydrogens if h.terminal is None or (isNTerminal and h.terminal == 'N') or (isCTerminal and h.terminal == 'C')]
                    hydrogens = [h for h in hydrogens if h.parent in parentNames]
Justin MacCallum's avatar
Justin MacCallum committed
696

Peter Eastman's avatar
Peter Eastman committed
697
                    # Loop over atoms in the residue, adding them to the new topology along with required hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
698

Peter Eastman's avatar
Peter Eastman committed
699
700
                    for parent in residue.atoms():
                        # Add the atom.
Justin MacCallum's avatar
Justin MacCallum committed
701

Peter Eastman's avatar
Peter Eastman committed
702
703
704
705
706
                        newAtom = newTopology.addAtom(parent.name, parent.element, newResidue)
                        newAtoms[parent] = newAtom
                        newPositions.append(deepcopy(self.positions[parent.index]))
                        if parent in parents:
                            # Match expected hydrogens with existing ones and find which ones need to be added.
Justin MacCallum's avatar
Justin MacCallum committed
707

Peter Eastman's avatar
Peter Eastman committed
708
709
710
711
                            existing = [atom for atom in bonded[parent] if atom.element == elem.hydrogen]
                            expected = [h for h in hydrogens if h.parent == parent.name]
                            if len(existing) < len(expected):
                                # Try to match up existing hydrogens to expected ones.
Justin MacCallum's avatar
Justin MacCallum committed
712

Peter Eastman's avatar
Peter Eastman committed
713
714
715
716
717
718
719
720
                                matches = []
                                for e in existing:
                                    match = [h for h in expected if h.name == e.name]
                                    if len(match) > 0:
                                        matches.append(match[0])
                                        expected.remove(match[0])
                                    else:
                                        matches.append(None)
Justin MacCallum's avatar
Justin MacCallum committed
721

Peter Eastman's avatar
Peter Eastman committed
722
                                # If any hydrogens couldn't be matched by name, just match them arbitrarily.
Justin MacCallum's avatar
Justin MacCallum committed
723

Peter Eastman's avatar
Peter Eastman committed
724
725
726
727
                                for i in range(len(matches)):
                                    if matches[i] is None:
                                        matches[i] = expected[-1]
                                        expected.remove(expected[-1])
Justin MacCallum's avatar
Justin MacCallum committed
728

Peter Eastman's avatar
Peter Eastman committed
729
                                # Add the missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
730

Peter Eastman's avatar
Peter Eastman committed
731
732
733
                                for h in expected:
                                    newH = newTopology.addAtom(h.name, elem.hydrogen, newResidue)
                                    newIndices.append(newH.index)
734
735
                                    delta = Vec3(0, 0, 0)*nanometer
                                    if len(bonded[parent]) > 0:
Peter Eastman's avatar
Peter Eastman committed
736
737
738
739
                                        for other in bonded[parent]:
                                            delta += self.positions[parent.index]-self.positions[other.index]
                                    else:
                                        delta = Vec3(random.random(), random.random(), random.random())*nanometer
740
                                    delta *= 0.1*nanometer/norm(delta)
741
742
                                    delta += 0.05*Vec3(random.random(), random.random(), random.random())*nanometer
                                    delta *= 0.1*nanometer/norm(delta)
Peter Eastman's avatar
Peter Eastman committed
743
744
745
746
                                    newPositions.append(self.positions[parent.index]+delta)
                                    newTopology.addBond(newAtom, newH)
                else:
                    # Just copy over the residue.
Justin MacCallum's avatar
Justin MacCallum committed
747

Peter Eastman's avatar
Peter Eastman committed
748
749
750
751
752
753
754
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
755

Peter Eastman's avatar
Peter Eastman committed
756
        # The hydrogens were added at random positions.  Now use the ForceField to fix them up.
Justin MacCallum's avatar
Justin MacCallum committed
757

758
        system = forcefield.createSystem(newTopology, rigidWater=False)
Peter Eastman's avatar
Peter Eastman committed
759
        atoms = list(newTopology.atoms())
760
761
762
763
        for i in range(system.getNumParticles()):
            if atoms[i].element != elem.hydrogen:
                # This is a heavy atom, so make it immobile.
                system.setParticleMass(i, 0)
764
765
766
767
        if platform is None:
            context = Context(system, VerletIntegrator(0.0))
        else:
            context = Context(system, VerletIntegrator(0.0), platform)
Peter Eastman's avatar
Peter Eastman committed
768
        context.setPositions(newPositions)
769
        LocalEnergyMinimizer.minimize(context)
Peter Eastman's avatar
Peter Eastman committed
770
771
        self.topology = newTopology
        self.positions = context.getState(getPositions=True).getPositions()
772
        del context
773
        return actualVariants
Justin MacCallum's avatar
Justin MacCallum committed
774

775
776
    def addExtraParticles(self, forcefield):
        """Add missing extra particles to the model that are required by a force field.
Justin MacCallum's avatar
Justin MacCallum committed
777

778
779
780
        Some force fields use "extra particles" that do not represent actual atoms, but still need to be included in
        the System.  Examples include lone pairs, Drude particles, and the virtual sites used in some water models
        to adjust the charge distribution.  Extra particles can be recognized by the fact that their element is None.
Justin MacCallum's avatar
Justin MacCallum committed
781

782
783
784
        This method is primarily used to add extra particles, but it can also remove them.  It tries to match every
        residue in the Topology to a template in the force field.  If there is no match, it will both add and remove
        extra particles as necessary to make it match.
Justin MacCallum's avatar
Justin MacCallum committed
785

786
787
788
789
        Parameters:
         - forcefield (ForceField) the ForceField defining what extra particles should be present
        """
        # Create copies of all residue templates that have had all extra points removed.
Justin MacCallum's avatar
Justin MacCallum committed
790

791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
        templatesNoEP = {}
        for resName, template in forcefield._templates.iteritems():
            if any(atom.element is None for atom in template.atoms):
                index = 0
                newIndex = {}
                newTemplate = ForceField._TemplateData(resName)
                for i, atom in enumerate(template.atoms):
                    if atom.element is not None:
                        newIndex[i] = index
                        index += 1
                        newTemplate.atoms.append(ForceField._TemplateAtomData(atom.name, atom.type, atom.element))
                for b1, b2 in template.bonds:
                    if b1 in newIndex and b2 in newIndex:
                        newTemplate.bonds.append((newIndex[b1], newIndex[b2]))
                        newTemplate.atoms[newIndex[b1]].bondedTo.append(newIndex[b2])
                        newTemplate.atoms[newIndex[b2]].bondedTo.append(newIndex[b1])
                for b in template.externalBonds:
                    if b in newIndex:
                        newTemplate.externalBonds.append(newIndex[b])
                templatesNoEP[template] = newTemplate
Justin MacCallum's avatar
Justin MacCallum committed
811

812
        # Record which atoms are bonded to each other atom, with and without extra particles.
Justin MacCallum's avatar
Justin MacCallum committed
813

814
815
816
817
        bondedToAtom = []
        bondedToAtomNoEP = []
        for atom in self.topology.atoms():
            bondedToAtom.append(set())
peastman's avatar
peastman committed
818
            bondedToAtomNoEP.append(set())
819
820
821
822
823
824
        for atom1, atom2 in self.topology.bonds():
            bondedToAtom[atom1.index].add(atom2.index)
            bondedToAtom[atom2.index].add(atom1.index)
            if atom1.element is not None and atom2.element is not None:
                bondedToAtomNoEP[atom1.index].add(atom2.index)
                bondedToAtomNoEP[atom2.index].add(atom1.index)
Justin MacCallum's avatar
Justin MacCallum committed
825

peastman's avatar
peastman committed
826
827
        # If the force field has a DrudeForce, record the types of Drude particles and their parents since we'll
        # need them for picking particle positions.
Justin MacCallum's avatar
Justin MacCallum committed
828

peastman's avatar
peastman committed
829
830
831
832
833
        drudeTypeMap = {}
        for force in forcefield._forces:
            if isinstance(force, DrudeGenerator):
                for type in force.typeMap:
                    drudeTypeMap[type] = force.typeMap[type][0]
Justin MacCallum's avatar
Justin MacCallum committed
834

835
        # Create the new Topology.
Justin MacCallum's avatar
Justin MacCallum committed
836

837
838
839
840
841
842
843
844
        newTopology = Topology()
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
        newAtoms = {}
        newPositions = []*nanometer
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
Justin MacCallum's avatar
Justin MacCallum committed
845

846
                # Look for a matching template.
Justin MacCallum's avatar
Justin MacCallum committed
847

848
849
850
851
852
853
854
855
                matchFound = False
                signature = _createResidueSignature([atom.element for atom in residue.atoms()])
                if signature in forcefield._templateSignatures:
                    for t in forcefield._templateSignatures[signature]:
                        if _matchResidue(residue, t, bondedToAtom) is not None:
                            matchFound = True
                if matchFound:
                    # Just copy the residue over.
Justin MacCallum's avatar
Justin MacCallum committed
856

857
858
859
860
861
862
863
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
                else:
                    # There's no matching template.  Try to find one that matches based on everything except
                    # extra points.
Justin MacCallum's avatar
Justin MacCallum committed
864

865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
                    template = None
                    residueNoEP = Residue(residue.name, residue.index, residue.chain)
                    residueNoEP._atoms = [atom for atom in residue.atoms() if atom.element is not None]
                    if signature in forcefield._templateSignatures:
                        for t in forcefield._templateSignatures[signature]:
                            if t in templatesNoEP:
                                matches = _matchResidue(residueNoEP, templatesNoEP[t], bondedToAtomNoEP)
                                if matches is not None:
                                    template = t;
                                    # Record the corresponding atoms.
                                    matchingAtoms = {}
                                    for atom, match in zip(residueNoEP.atoms(), matches):
                                        templateAtomName = t.atoms[match].name
                                        for templateAtom in template.atoms:
                                            if templateAtom.name == templateAtomName:
                                                matchingAtoms[templateAtom] = atom
                                    break
                    if template is None:
                        raise ValueError('Residue %d (%s) does not match any template defined by the ForceField.' % (residue.index+1, residue.name))
Justin MacCallum's avatar
Justin MacCallum committed
884

885
                    # Add the regular atoms.
Justin MacCallum's avatar
Justin MacCallum committed
886

887
888
889
890
                    for atom in residue.atoms():
                        if atom.element is not None:
                            newAtoms[atom] = newTopology.addAtom(atom.name, atom.element, newResidue)
                            newPositions.append(deepcopy(self.positions[atom.index]))
Justin MacCallum's avatar
Justin MacCallum committed
891

892
                    # Add the extra points.
Justin MacCallum's avatar
Justin MacCallum committed
893

894
895
896
                    templateAtomPositions = len(template.atoms)*[None]
                    for index, atom in enumerate(template.atoms):
                        if atom in matchingAtoms:
peastman's avatar
peastman committed
897
                            templateAtomPositions[index] = self.positions[matchingAtoms[atom].index].value_in_unit(nanometer)
898
899
900
901
902
903
904
                    for index, atom in enumerate(template.atoms):
                        if atom.element is None:
                            newTopology.addAtom(atom.name, None, newResidue)
                            position = None
                            for site in template.virtualSites:
                                if site.index == index:
                                    # This is a virtual site.  Compute its position by the correct rule.
Justin MacCallum's avatar
Justin MacCallum committed
905

906
907
908
909
910
                                    if site.type == 'average2':
                                        position = site.weights[0]*templateAtomPositions[index+site.atoms[0]] + site.weights[1]*templateAtomPositions[index+site.atoms[1]]
                                    elif site.type == 'average3':
                                        position = site.weights[0]*templateAtomPositions[index+site.atoms[0]] + site.weights[1]*templateAtomPositions[index+site.atoms[1]] + site.weights[2]*templateAtomPositions[index+site.atoms[2]]
                                    elif site.type == 'outOfPlane':
peastman's avatar
peastman committed
911
912
913
914
                                        v1 = templateAtomPositions[index+site.atoms[1]] - templateAtomPositions[index+site.atoms[0]]
                                        v2 = templateAtomPositions[index+site.atoms[2]] - templateAtomPositions[index+site.atoms[0]]
                                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
                                        position = templateAtomPositions[index+site.atoms[0]] + site.weights[0]*v1 + site.weights[1]*v2 + site.weights[2]*cross
peastman's avatar
peastman committed
915
916
                            if position is None and atom.type in drudeTypeMap:
                                # This is a Drude particle.  Put it on top of its parent atom.
Justin MacCallum's avatar
Justin MacCallum committed
917

peastman's avatar
peastman committed
918
919
920
                                for atom2, pos in zip(template.atoms, templateAtomPositions):
                                    if atom2.type in drudeTypeMap[atom.type]:
                                        position = deepcopy(pos)
921
922
923
                            if position is None:
                                # We couldn't figure out the correct position.  As a wild guess, just put it at the center of the residue
                                # and hope that energy minimization will fix it.
Justin MacCallum's avatar
Justin MacCallum committed
924

925
                                knownPositions = [x for x in templateAtomPositions if x is not None]
peastman's avatar
peastman committed
926
927
                                position = sum(knownPositions)/len(knownPositions)
                            newPositions.append(position*nanometer)
928
929
930
931
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
932
        self.positions = newPositions