modeller.py 54.4 KB
Newer Older
1
2
"""
modeller.py: Provides tools for editing molecular models
3
4
5
6
7
8

This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.

9
Portions copyright (c) 2012-2015 Stanford University and the Authors.
10
11
12
Authors: Peter Eastman
Contributors:

Justin MacCallum's avatar
Justin MacCallum committed
13
Permission is hereby granted, free of charge, to any person obtaining a
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
30
"""
Peter Eastman's avatar
Peter Eastman committed
31
from __future__ import division
32
from __future__ import absolute_import
Peter Eastman's avatar
Peter Eastman committed
33

34
35
36
__author__ = "Peter Eastman"
__version__ = "1.0"

37
from simtk.openmm.app import Topology, PDBFile, ForceField
peastman's avatar
peastman committed
38
from simtk.openmm.app.forcefield import HAngles, _createResidueSignature, _matchResidue, DrudeGenerator
39
from simtk.openmm.app.topology import Residue
40
from simtk.openmm.vec3 import Vec3
41
from simtk.openmm import System, Context, NonbondedForce, CustomNonbondedForce, HarmonicBondForce, HarmonicAngleForce, VerletIntegrator, LocalEnergyMinimizer
42
from simtk.unit import nanometer, molar, elementary_charge, amu, gram, liter, degree, sqrt, acos, is_quantity, dot, norm
43
import simtk.unit as unit
44
from . import element as elem
Peter Eastman's avatar
Peter Eastman committed
45
46
import os
import random
Peter Eastman's avatar
Peter Eastman committed
47
import xml.etree.ElementTree as etree
Peter Eastman's avatar
Peter Eastman committed
48
49
from copy import deepcopy
from math import ceil, floor
50
51
52

class Modeller(object):
    """Modeller provides tools for editing molecular models, such as adding water or missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
53

54
55
56
57
58
    To use it, create a Modeller object, specifying the initial Topology and atom positions.  You can
    then call various methods to change the model in different ways.  Each time you do, a new Topology
    and list of coordinates is created to represent the changed model.  Finally, call getTopology()
    and getPositions() to get the results.
    """
Justin MacCallum's avatar
Justin MacCallum committed
59

60
61
    _residueHydrogens = {}
    _hasLoadedStandardHydrogens = False
Peter Eastman's avatar
Peter Eastman committed
62

63
64
    def __init__(self, topology, positions):
        """Create a new Modeller object
Justin MacCallum's avatar
Justin MacCallum committed
65

Robert McGibbon's avatar
Robert McGibbon committed
66
67
68
69
70
71
        Parameters
        ----------
        topology : Topology
            the initial Topology of the model
        positions : list
            the initial atomic positions
72
        """
73
        ## The Topology describing the structure of the system
74
        self.topology = topology
Peter Eastman's avatar
Peter Eastman committed
75
        if not is_quantity(positions):
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
76
            positions = positions*nanometer
77
        ## The list of atom positions
78
        self.positions = positions
Justin MacCallum's avatar
Justin MacCallum committed
79

80
81
82
    def getTopology(self):
        """Get the Topology of the model."""
        return self.topology
Justin MacCallum's avatar
Justin MacCallum committed
83

84
85
86
87
    def getPositions(self):
        """Get the atomic positions."""
        return self.positions

88
89
    def add(self, addTopology, addPositions):
        """Add chains, residues, atoms, and bonds to the model.
Justin MacCallum's avatar
Justin MacCallum committed
90

Robert McGibbon's avatar
Robert McGibbon committed
91
92
93
94
95
96
97
98
99
100
        Specify what to add by providing a new Topology object and the
        corresponding atomic positions. All chains, residues, atoms, and bonds
        contained in the Topology are added to the model.

        Parameters
        ----------
        addTopology : Topology
            a Topology whose contents should be added to the model
        addPositions : list
            the positions of the atoms to add
101
102
        """
        # Copy over the existing model.
Justin MacCallum's avatar
Justin MacCallum committed
103

104
        newTopology = Topology()
105
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
106
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
107
        newPositions = []*nanometer
108
        for chain in self.topology.chains():
109
            newChain = newTopology.addChain(chain.id)
110
            for residue in chain.residues():
111
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
112
                for atom in residue.atoms():
113
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
114
115
116
117
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
118

119
120
121
122
        # Add the new model

        newAtoms = {}
        for chain in addTopology.chains():
123
            newChain = newTopology.addChain(chain.id)
124
            for residue in chain.residues():
125
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
126
                for atom in residue.atoms():
127
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
128
129
130
131
132
133
134
135
136
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(addPositions[atom.index]))
        for bond in addTopology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions

    def delete(self, toDelete):
        """Delete chains, residues, atoms, and bonds from the model.
Justin MacCallum's avatar
Justin MacCallum committed
137

138
139
140
141
        You can specify objects to delete at any granularity: atoms, residues, or chains.  Passing
        in an Atom object causes that Atom to be deleted.  Passing in a Residue object causes that
        Residue and all Atoms it contains to be deleted.  Passing in a Chain object causes that
        Chain and all Residues and Atoms it contains to be deleted.
Justin MacCallum's avatar
Justin MacCallum committed
142

143
144
145
        In all cases, when an Atom is deleted, any bonds it participates in are also deleted.
        You also can specify a bond (as a tuple of Atom objects) to delete just that bond without
        deleting the Atoms it connects.
Justin MacCallum's avatar
Justin MacCallum committed
146

Robert McGibbon's avatar
Robert McGibbon committed
147
148
149
150
151
        Parameters
        ----------
        toDelete : list
            a list of Atoms, Residues, Chains, and bonds (specified as tuples of
            Atoms) to delete
152
153
        """
        newTopology = Topology()
154
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
155
156
157
158
159
        newAtoms = {}
        newPositions = []*nanometer
        deleteSet = set(toDelete)
        for chain in self.topology.chains():
            if chain not in deleteSet:
160
                needNewChain = True;
161
162
                for residue in chain.residues():
                    if residue not in deleteSet:
163
                        needNewResidue = True
164
165
                        for atom in residue.atoms():
                            if atom not in deleteSet:
166
                                if needNewChain:
167
                                    newChain = newTopology.addChain(chain.id)
168
169
                                    needNewChain = False;
                                if needNewResidue:
170
                                    newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
171
                                    needNewResidue = False;
172
                                newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
173
174
                                newAtoms[atom] = newAtom
                                newPositions.append(deepcopy(self.positions[atom.index]))
175
176
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
177
178
                if bond not in deleteSet and (bond[1], bond[0]) not in deleteSet:
                    newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
179
180
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
181

182
183
184
    def deleteWater(self):
        """Delete all water molecules from the model."""
        self.delete(res for res in self.topology.residues() if res.name == "HOH")
Justin MacCallum's avatar
Justin MacCallum committed
185

186
187
    def convertWater(self, model='tip3p'):
        """Convert all water molecules to a different water model.
Justin MacCallum's avatar
Justin MacCallum committed
188

189
190
191
192
193
194
        Parameters
        ----------
        model : string='tip3p'
            the water model to convert to.  Supported values are 'tip3p',
            'spce', 'tip4pew', and 'tip5p'.

Robert McGibbon's avatar
Robert McGibbon committed
195

196
197
        @deprecated Use addExtraParticles() instead.  It performs the same
        function but in a more general way.
198
        """
Peter Eastman's avatar
Peter Eastman committed
199
        if model in ('tip3p', 'spce'):
200
201
202
203
204
205
206
207
            sites = 3
        elif model == 'tip4pew':
            sites = 4
        elif model == 'tip5p':
            sites = 5
        else:
            raise ValueError('Unknown water model: %s' % model)
        newTopology = Topology()
208
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
209
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
210
        newPositions = []*nanometer
211
        for chain in self.topology.chains():
212
            newChain = newTopology.addChain(chain.id)
213
            for residue in chain.residues():
214
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
215
216
217
218
219
220
221
222
223
224
225
226
                if residue.name == "HOH":
                    # Copy the oxygen and hydrogens
                    oatom = [atom for atom in residue.atoms() if atom.element == elem.oxygen]
                    hatoms = [atom for atom in residue.atoms() if atom.element == elem.hydrogen]
                    if len(oatom) != 1 or len(hatoms) != 2:
                        raise ValueError('Illegal water molecule (residue %d): contains %d oxygen(s) and %d hydrogen(s)' % (residue.index, len(oatom), len(hatoms)))
                    o = newTopology.addAtom(oatom[0].name, oatom[0].element, newResidue)
                    h1 = newTopology.addAtom(hatoms[0].name, hatoms[0].element, newResidue)
                    h2 = newTopology.addAtom(hatoms[1].name, hatoms[1].element, newResidue)
                    newAtoms[oatom[0]] = o
                    newAtoms[hatoms[0]] = h1
                    newAtoms[hatoms[1]] = h2
Peter Eastman's avatar
Peter Eastman committed
227
228
229
                    po = deepcopy(self.positions[oatom[0].index])
                    ph1 = deepcopy(self.positions[hatoms[0].index])
                    ph2 = deepcopy(self.positions[hatoms[1].index])
230
231
232
                    newPositions.append(po)
                    newPositions.append(ph1)
                    newPositions.append(ph2)
Justin MacCallum's avatar
Justin MacCallum committed
233

234
                    # Add virtual sites.
Justin MacCallum's avatar
Justin MacCallum committed
235

236
237
                    if sites == 4:
                        newTopology.addAtom('M', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
238
                        newPositions.append(0.786646558*po + 0.106676721*ph1 + 0.106676721*ph2)
239
240
241
                    elif sites == 5:
                        newTopology.addAtom('M1', None, newResidue)
                        newTopology.addAtom('M2', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
242
243
                        v1 = (ph1-po).value_in_unit(nanometer)
                        v2 = (ph2-po).value_in_unit(nanometer)
244
                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Peter Eastman's avatar
Peter Eastman committed
245
246
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 - 6.4437903*cross)*nanometer)
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 + 6.4437903*cross)*nanometer)
247
248
249
                else:
                    # Just copy the residue over.
                    for atom in residue.atoms():
250
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
251
                        newAtoms[atom] = newAtom
Peter Eastman's avatar
Peter Eastman committed
252
                        newPositions.append(deepcopy(self.positions[atom.index]))
253
254
255
256
257
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions
Peter Eastman's avatar
Peter Eastman committed
258

259
    def addSolvent(self, forcefield, model='tip3p', boxSize=None, boxVectors=None, padding=None, numAdded=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar, neutralize=True):
Peter Eastman's avatar
Peter Eastman committed
260
        """Add solvent (both water and ions) to the model to fill a rectangular box.
Justin MacCallum's avatar
Justin MacCallum committed
261

Peter Eastman's avatar
Peter Eastman committed
262
        The algorithm works as follows:
Robert McGibbon's avatar
Robert McGibbon committed
263

Peter Eastman's avatar
Peter Eastman committed
264
265
        1. Water molecules are added to fill the box.
        2. Water molecules are removed if their distance to any solute atom is less than the sum of their van der Waals radii.
266
        3. If the solute is charged and neutralize=True, enough positive or negative ions are added to neutralize it.  Each ion is added by
Peter Eastman's avatar
Peter Eastman committed
267
268
           randomly selecting a water molecule and replacing it with the ion.
        4. Ion pairs are added to give the requested total ionic strength.
Justin MacCallum's avatar
Justin MacCallum committed
269

270
        The box size can be specified in any of several ways:
Robert McGibbon's avatar
Robert McGibbon committed
271

272
273
274
        1. You can explicitly give the vectors defining the periodic box to use.
        2. Alternatively, for a rectangular box you can simply give the dimensions of the unit cell.
        3. You can give a padding distance.  The largest dimension of the solute (along the x, y, or z axis) is determined, and a cubic
Robert McGibbon's avatar
Robert McGibbon committed
275
           box of size (largest dimension)+2*padding is used.
276
        4. You can specify the total number of molecules (both waters and ions) to add.  A cubic box is then created whose size is
Robert McGibbon's avatar
Robert McGibbon committed
277
           just large enough hold the specified amount of solvent.
278
        5. Finally, if none of the above options is specified, the existing Topology's box vectors are used.
Justin MacCallum's avatar
Justin MacCallum committed
279

Robert McGibbon's avatar
Robert McGibbon committed
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
        Parameters
        ----------
        forcefield : ForceField
            the ForceField to use for determining van der Waals radii and atomic charges
        model : str='tip3p'
            the water model to use.  Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
        boxSize : Vec3=None
            the size of the box to fill with water
        boxVectors : tuple of Vec3=None
            the vectors defining the periodic box to fill with water
        padding : distance=None
            the padding distance to use
        numAdded : int=None
            the total number of molecules (waters and ions) to add
        positiveIon : string='Na+'
            the type of positive ion to add.  Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'
        negativeIon : string='Cl-'
            the type of negative ion to add.  Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware
            that not all force fields support all ion types.
        ionicStrength : concentration=0*molar
            the total concentration of ions (both positive and negative) to add.  This
            does not include ions that are added to neutralize the system.
Robert McGibbon's avatar
Robert McGibbon committed
302
303
        neutralize : bool=True
            whether to add ions to neutralize the system
Peter Eastman's avatar
Peter Eastman committed
304
        """
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
        if len([x for x in (boxSize, boxVectors, padding, numAdded) if x is not None]) > 1:
            raise ValueError('At most one of the following arguments may be specified: boxSize, boxVectors, padding, numAdded')

        # Load the pre-equilibrated water box.

        vdwRadiusPerSigma = 0.5612310241546864907
        if model == 'tip3p':
            waterRadius = 0.31507524065751241*vdwRadiusPerSigma
        elif model == 'spce':
            waterRadius = 0.31657195050398818*vdwRadiusPerSigma
        elif model == 'tip4pew':
            waterRadius = 0.315365*vdwRadiusPerSigma
        elif model == 'tip5p':
            waterRadius = 0.312*vdwRadiusPerSigma
        else:
            raise ValueError('Unknown water model: %s' % model)
        pdb = PDBFile(os.path.join(os.path.dirname(__file__), 'data', model+'.pdb'))
        pdbTopology = pdb.getTopology()
        pdbPositions = pdb.getPositions().value_in_unit(nanometer)
        pdbResidues = list(pdbTopology.residues())
Robert McGibbon's avatar
Robert McGibbon committed
325
326
        pdbBoxSize = pdbTopology.getUnitCellDimensions().value_in_unit(nanometer)

Peter Eastman's avatar
Peter Eastman committed
327
        # Pick a unit cell size.
Justin MacCallum's avatar
Justin MacCallum committed
328

329
330
        if numAdded is not None:
            # Select a padding distance which is guaranteed to give more than the specified number of molecules.
Robert McGibbon's avatar
Robert McGibbon committed
331

332
333
334
            padding = 1.1*(numAdded/((len(pdbResidues)/pdbBoxSize[0]**3)*8))**(1.0/3.0)
            if padding < 0.5:
                padding = 0.5 # Ensure we have enough when adding very small numbers of molecules
335
336
337
338
339
340
        if boxVectors is not None:
            if is_quantity(boxVectors[0]):
                boxVectors = (boxVectors[0].value_in_unit(nanometer), boxVectors[1].value_in_unit(nanometer), boxVectors[2].value_in_unit(nanometer))
            box = Vec3(boxVectors[0][0], boxVectors[1][1], boxVectors[2][2])
            vectors = boxVectors
        elif boxSize is not None:
341
342
            if is_quantity(boxSize):
                boxSize = boxSize.value_in_unit(nanometer)
343
344
            box = Vec3(boxSize[0], boxSize[1], boxSize[2])
            vectors = (Vec3(boxSize[0], 0, 0), Vec3(0, boxSize[1], 0), Vec3(0, 0, boxSize[2]))
Peter Eastman's avatar
Peter Eastman committed
345
        elif padding is not None:
346
347
            if is_quantity(padding):
                padding = padding.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
348
            maxSize = max(max((pos[i] for pos in self.positions))-min((pos[i] for pos in self.positions)) for i in range(3))
349
            maxSize = maxSize.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
350
            box = (maxSize+2*padding)*Vec3(1, 1, 1)
351
            vectors = (Vec3(maxSize+2*padding, 0, 0), Vec3(0, maxSize+2*padding, 0), Vec3(0, 0, maxSize+2*padding))
Peter Eastman's avatar
Peter Eastman committed
352
        else:
353
354
            box = self.topology.getUnitCellDimensions().value_in_unit(nanometer)
            vectors = self.topology.getPeriodicBoxVectors().value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
355
            if box is None:
356
                raise ValueError('Neither the box size, box vectors, nor padding was specified, and the Topology does not define unit cell dimensions')
Peter Eastman's avatar
Peter Eastman committed
357
        invBox = Vec3(1.0/box[0], 1.0/box[1], 1.0/box[2])
Justin MacCallum's avatar
Justin MacCallum committed
358

Peter Eastman's avatar
Peter Eastman committed
359
        # Identify the ion types.
Justin MacCallum's avatar
Justin MacCallum committed
360

Peter Eastman's avatar
Peter Eastman committed
361
362
363
364
365
366
367
368
        posIonElements = {'Cs+':elem.cesium, 'K+':elem.potassium, 'Li+':elem.lithium, 'Na+':elem.sodium, 'Rb+':elem.rubidium}
        negIonElements = {'Cl-':elem.chlorine, 'Br-':elem.bromine, 'F-':elem.fluorine, 'I-':elem.iodine}
        if positiveIon not in posIonElements:
            raise ValueError('Illegal value for positive ion: %s' % positiveIon)
        if negativeIon not in negIonElements:
            raise ValueError('Illegal value for negative ion: %s' % negativeIon)
        positiveElement = posIonElements[positiveIon]
        negativeElement = negIonElements[negativeIon]
Justin MacCallum's avatar
Justin MacCallum committed
369

Peter Eastman's avatar
Peter Eastman committed
370
        # Have the ForceField build a System for the solute from which we can determine van der Waals radii.
Justin MacCallum's avatar
Justin MacCallum committed
371

Peter Eastman's avatar
Peter Eastman committed
372
373
374
375
376
377
378
379
        system = forcefield.createSystem(self.topology)
        nonbonded = None
        for i in range(system.getNumForces()):
            if isinstance(system.getForce(i), NonbondedForce):
                nonbonded = system.getForce(i)
        if nonbonded is None:
            raise ValueError('The ForceField does not specify a NonbondedForce')
        cutoff = [nonbonded.getParticleParameters(i)[1].value_in_unit(nanometer)*vdwRadiusPerSigma+waterRadius for i in range(system.getNumParticles())]
380
        waterCutoff = waterRadius
Peter Eastman's avatar
Peter Eastman committed
381
382
383
384
        if len(cutoff) == 0:
            maxCutoff = waterCutoff
        else:
            maxCutoff = max(waterCutoff, max(cutoff))
Justin MacCallum's avatar
Justin MacCallum committed
385

Peter Eastman's avatar
Peter Eastman committed
386
387
388
        # Copy the solute over.

        newTopology = Topology()
389
        newTopology.setPeriodicBoxVectors(vectors*nanometer)
Peter Eastman's avatar
Peter Eastman committed
390
391
392
        newAtoms = {}
        newPositions = []*nanometer
        for chain in self.topology.chains():
393
            newChain = newTopology.addChain(chain.id)
Peter Eastman's avatar
Peter Eastman committed
394
            for residue in chain.residues():
395
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Peter Eastman's avatar
Peter Eastman committed
396
                for atom in residue.atoms():
397
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
Peter Eastman's avatar
Peter Eastman committed
398
399
400
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
401
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
402

Peter Eastman's avatar
Peter Eastman committed
403
        # Sort the solute atoms into cells for fast lookup.
Justin MacCallum's avatar
Justin MacCallum committed
404

Peter Eastman's avatar
Peter Eastman committed
405
406
407
408
        if len(self.positions) == 0:
            positions = []
        else:
            positions = self.positions.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
409
        cells = {}
Peter Eastman's avatar
Peter Eastman committed
410
        numCells = tuple((max(1, int(floor(box[i]/maxCutoff))) for i in range(3)))
Peter Eastman's avatar
Peter Eastman committed
411
412
413
414
415
416
417
        cellSize = tuple((box[i]/numCells[i] for i in range(3)))
        for i in range(len(positions)):
            cell = tuple((int(floor(positions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
            if cell in cells:
                cells[cell].append(i)
            else:
                cells[cell] = [i]
Justin MacCallum's avatar
Justin MacCallum committed
418

Peter Eastman's avatar
Peter Eastman committed
419
        # Create a generator that loops over atoms close to a position.
Justin MacCallum's avatar
Justin MacCallum committed
420

Peter Eastman's avatar
Peter Eastman committed
421
422
423
424
425
426
427
428
429
430
        def neighbors(pos):
            centralCell = tuple((int(floor(pos[i]/cellSize[i])) for i in range(3)))
            offsets = (-1, 0, 1)
            for i in offsets:
                for j in offsets:
                    for k in offsets:
                        cell = ((centralCell[0]+i+numCells[0])%numCells[0], (centralCell[1]+j+numCells[1])%numCells[1], (centralCell[2]+k+numCells[2])%numCells[2])
                        if cell in cells:
                            for atom in cells[cell]:
                                yield atom
Justin MacCallum's avatar
Justin MacCallum committed
431

Peter Eastman's avatar
Peter Eastman committed
432
        # Define a function to compute the distance between two points, taking periodic boundary conditions into account.
Justin MacCallum's avatar
Justin MacCallum committed
433

Peter Eastman's avatar
Peter Eastman committed
434
435
        def periodicDistance(pos1, pos2):
            delta = pos1-pos2
436
437
438
            delta -= vectors[2]*floor(delta[2]*invBox[2]+0.5)
            delta -= vectors[1]*floor(delta[1]*invBox[1]+0.5)
            delta -= vectors[0]*floor(delta[0]*invBox[0]+0.5)
439
            return norm(delta)
Justin MacCallum's avatar
Justin MacCallum committed
440

Peter Eastman's avatar
Peter Eastman committed
441
        # Find the list of water molecules to add.
Justin MacCallum's avatar
Justin MacCallum committed
442

Peter Eastman's avatar
Peter Eastman committed
443
        newChain = newTopology.addChain()
Peter Eastman's avatar
Peter Eastman committed
444
445
446
447
448
        if len(positions) == 0:
            center = Vec3(0, 0, 0)
        else:
            center = [(max((pos[i] for pos in positions))+min((pos[i] for pos in positions)))/2 for i in range(3)]
            center = Vec3(center[0], center[1], center[2])
Peter Eastman's avatar
Peter Eastman committed
449
450
451
452
453
454
455
456
457
458
459
        numBoxes = [int(ceil(box[i]/pdbBoxSize[i])) for i in range(3)]
        addedWaters = []
        for boxx in range(numBoxes[0]):
            for boxy in range(numBoxes[1]):
                for boxz in range(numBoxes[2]):
                    offset = Vec3(boxx*pdbBoxSize[0], boxy*pdbBoxSize[1], boxz*pdbBoxSize[2])
                    for residue in pdbResidues:
                        oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
                        atomPos = pdbPositions[oxygen.index]+offset
                        if not any((atomPos[i] > box[i] for i in range(3))):
                            # This molecule is inside the box, so see how close to it is to the solute.
Justin MacCallum's avatar
Justin MacCallum committed
460

Peter Eastman's avatar
Peter Eastman committed
461
462
463
464
465
466
                            atomPos += center-box/2
                            for i in neighbors(atomPos):
                                if periodicDistance(atomPos, positions[i]) < cutoff[i]:
                                    break
                            else:
                                # Record this water molecule as one to add.
Justin MacCallum's avatar
Justin MacCallum committed
467

Peter Eastman's avatar
Peter Eastman committed
468
                                addedWaters.append((residue.index, atomPos))
Justin MacCallum's avatar
Justin MacCallum committed
469

470
471
472
        if numAdded is not None:
            # We added many more waters than we actually want.  Sort them based on distance to the nearest box edge and
            # only keep the ones in the middle.
Robert McGibbon's avatar
Robert McGibbon committed
473

474
475
            lowerBound = center-box/2
            upperBound = center+box/2
peastman's avatar
peastman committed
476
            distToEdge = (min(min(pos-lowerBound), min(upperBound-pos)) for index, pos in addedWaters)
477
478
            sortedIndex = [i[0] for i in sorted(enumerate(distToEdge), key=lambda x: -x[1])]
            addedWaters = [addedWaters[i] for i in sortedIndex[:numAdded]]
Robert McGibbon's avatar
Robert McGibbon committed
479

480
            # Compute a new periodic box size.
Robert McGibbon's avatar
Robert McGibbon committed
481

482
483
484
485
            maxSize = max(max((pos[i] for index, pos in addedWaters))-min((pos[i] for index, pos in addedWaters)) for i in range(3))
            newTopology.setUnitCellDimensions(Vec3(maxSize, maxSize, maxSize))
        else:
            # There could be clashes between water molecules at the box edges.  Find ones to remove.
Robert McGibbon's avatar
Robert McGibbon committed
486

487
488
489
490
491
492
493
494
495
496
497
498
499
500
            upperCutoff = center+box/2-Vec3(waterCutoff, waterCutoff, waterCutoff)
            lowerCutoff = center-box/2+Vec3(waterCutoff, waterCutoff, waterCutoff)
            lowerSkinPositions = [pos for index, pos in addedWaters if pos[0] < lowerCutoff[0] or pos[1] < lowerCutoff[1] or pos[2] < lowerCutoff[2]]
            filteredWaters = []
            cells = {}
            for i in range(len(lowerSkinPositions)):
                cell = tuple((int(floor(lowerSkinPositions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
                if cell in cells:
                    cells[cell].append(i)
                else:
                    cells[cell] = [i]
            for entry in addedWaters:
                pos = entry[1]
                if pos[0] < upperCutoff[0] and pos[1] < upperCutoff[1] and pos[2] < upperCutoff[2]:
Peter Eastman's avatar
Peter Eastman committed
501
                    filteredWaters.append(entry)
502
503
504
505
                else:
                    if not any((periodicDistance(lowerSkinPositions[i], pos) < waterCutoff and norm(lowerSkinPositions[i]-pos) > waterCutoff for i in neighbors(pos))):
                        filteredWaters.append(entry)
            addedWaters = filteredWaters
Justin MacCallum's avatar
Justin MacCallum committed
506

Peter Eastman's avatar
Peter Eastman committed
507
        # Add ions to neutralize the system.
Justin MacCallum's avatar
Justin MacCallum committed
508

Peter Eastman's avatar
Peter Eastman committed
509
510
511
512
513
514
515
        def addIon(element):
            # Replace a water by an ion.
            index = random.randint(0, len(addedWaters)-1)
            newResidue = newTopology.addResidue(element.symbol.upper(), newChain)
            newTopology.addAtom(element.symbol, element, newResidue)
            newPositions.append(addedWaters[index][1]*nanometer)
            del addedWaters[index]
516
517
518
519
520
521
        if neutralize:
            totalCharge = int(floor(0.5+sum((nonbonded.getParticleParameters(i)[0].value_in_unit(elementary_charge) for i in range(system.getNumParticles())))))
            if abs(totalCharge) > len(addedWaters):
                raise Exception('Cannot neutralize the system because the charge is greater than the number of available positions for ions')
            for i in range(abs(totalCharge)):
                addIon(positiveElement if totalCharge < 0 else negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
522

Peter Eastman's avatar
Peter Eastman committed
523
        # Add ions based on the desired ionic strength.
Justin MacCallum's avatar
Justin MacCallum committed
524

Peter Eastman's avatar
Peter Eastman committed
525
526
527
528
529
530
        numIons = len(addedWaters)*ionicStrength/(55.4*molar) # Pure water is about 55.4 molar (depending on temperature)
        numPairs = int(floor(numIons/2+0.5))
        for i in range(numPairs):
            addIon(positiveElement)
        for i in range(numPairs):
            addIon(negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
531

Peter Eastman's avatar
Peter Eastman committed
532
        # Add the water molecules.
Justin MacCallum's avatar
Justin MacCallum committed
533

Peter Eastman's avatar
Peter Eastman committed
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
        for index, pos in addedWaters:
            newResidue = newTopology.addResidue(residue.name, newChain)
            residue = pdbResidues[index]
            oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
            oPos = pdbPositions[oxygen.index]
            molAtoms = []
            for atom in residue.atoms():
                molAtoms.append(newTopology.addAtom(atom.name, atom.element, newResidue))
                newPositions.append((pos+pdbPositions[atom.index]-oPos)*nanometer)
            for atom1 in molAtoms:
                if atom1.element == elem.oxygen:
                    for atom2 in molAtoms:
                        if atom2.element == elem.hydrogen:
                            newTopology.addBond(atom1, atom2)
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
550

Peter Eastman's avatar
Peter Eastman committed
551
552
553
554
555
556
    class _ResidueData:
        """Inner class used to encapsulate data about the hydrogens for a residue."""
        def __init__(self, name):
            self.name = name
            self.variants = []
            self.hydrogens = []
Justin MacCallum's avatar
Justin MacCallum committed
557

Peter Eastman's avatar
Peter Eastman committed
558
559
560
561
562
563
564
565
    class _Hydrogen:
        """Inner class used to encapsulate data about a hydrogen atom."""
        def __init__(self, name, parent, maxph, variants, terminal):
            self.name = name
            self.parent = parent
            self.maxph = maxph
            self.variants = variants
            self.terminal = terminal
Justin MacCallum's avatar
Justin MacCallum committed
566

567
568
569
    @staticmethod
    def loadHydrogenDefinitions(file):
        """Load an XML file containing definitions of hydrogens that should be added by addHydrogens().
Justin MacCallum's avatar
Justin MacCallum committed
570

571
572
573
574
        The built in hydrogens.xml file containing definitions for standard amino acids and nucleotides is loaded automatically.
        This method can be used to load additional definitions for other residue types.  They will then be used in subsequent
        calls to addHydrogens().
        """
575
576
577
578
579
        tree = etree.parse(file)
        infinity = float('Inf')
        for residue in tree.getroot().findall('Residue'):
            resName = residue.attrib['name']
            data = Modeller._ResidueData(resName)
580
            Modeller._residueHydrogens[resName] = data
581
582
583
584
585
586
587
588
589
590
591
592
593
            for variant in residue.findall('Variant'):
                data.variants.append(variant.attrib['name'])
            for hydrogen in residue.findall('H'):
                maxph = infinity
                if 'maxph' in hydrogen.attrib:
                    maxph = float(hydrogen.attrib['maxph'])
                atomVariants = None
                if 'variant' in hydrogen.attrib:
                    atomVariants = hydrogen.attrib['variant'].split(',')
                terminal = None
                if 'terminal' in hydrogen.attrib:
                    terminal = hydrogen.attrib['terminal']
                data.hydrogens.append(Modeller._Hydrogen(hydrogen.attrib['name'], hydrogen.attrib['parent'], maxph, atomVariants, terminal))
Justin MacCallum's avatar
Justin MacCallum committed
594

595
    def addHydrogens(self, forcefield=None, pH=7.0, variants=None, platform=None):
Peter Eastman's avatar
Peter Eastman committed
596
        """Add missing hydrogens to the model.
Justin MacCallum's avatar
Justin MacCallum committed
597

Peter Eastman's avatar
Peter Eastman committed
598
599
600
        Some residues can exist in multiple forms depending on the pH and properties of the local environment.  These
        variants differ in the presence or absence of particular hydrogens.  In particular, the following variants
        are supported:
Justin MacCallum's avatar
Justin MacCallum committed
601

Peter Eastman's avatar
Peter Eastman committed
602
603
604
        Aspartic acid:
            ASH: Neutral form with a hydrogen on one of the delta oxygens
            ASP: Negatively charged form without a hydrogen on either delta oxygen
Justin MacCallum's avatar
Justin MacCallum committed
605

Peter Eastman's avatar
Peter Eastman committed
606
607
608
        Cysteine:
            CYS: Neutral form with a hydrogen on the sulfur
            CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond)
Justin MacCallum's avatar
Justin MacCallum committed
609

Peter Eastman's avatar
Peter Eastman committed
610
611
612
        Glutamic acid:
            GLH: Neutral form with a hydrogen on one of the epsilon oxygens
            GLU: Negatively charged form without a hydrogen on either epsilon oxygen
Justin MacCallum's avatar
Justin MacCallum committed
613

Peter Eastman's avatar
Peter Eastman committed
614
615
616
617
        Histidine:
            HID: Neutral form with a hydrogen on the ND1 atom
            HIE: Neutral form with a hydrogen on the NE2 atom
            HIP: Positively charged form with hydrogens on both ND1 and NE2
Justin MacCallum's avatar
Justin MacCallum committed
618

Peter Eastman's avatar
Peter Eastman committed
619
620
621
        Lysine:
            LYN: Neutral form with two hydrogens on the zeta nitrogen
            LYS: Positively charged form with three hydrogens on the zeta nitrogen
Justin MacCallum's avatar
Justin MacCallum committed
622

Peter Eastman's avatar
Peter Eastman committed
623
        The variant to use for each residue is determined by the following rules:
Justin MacCallum's avatar
Justin MacCallum committed
624

Peter Eastman's avatar
Peter Eastman committed
625
626
627
        1. The most common variant at the specified pH is selected.
        2. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH.
        3. For a neutral Histidine residue, the HID or HIE variant is selected based on which one forms a better hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
628

Peter Eastman's avatar
Peter Eastman committed
629
630
631
        You can override these rules by explicitly specifying a variant for any residue.  Also keep in mind that this
        function will only add hydrogens.  It will never remove ones that are already present in the model, regardless
        of the specified pH.
Justin MacCallum's avatar
Justin MacCallum committed
632

633
634
635
        Definitions for standard amino acids and nucleotides are built in.  You can call loadHydrogenDefinitions() to load
        additional definitions for other residue types.

Robert McGibbon's avatar
Robert McGibbon committed
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
        Parameters
        ----------
        forcefield : ForceField=None
            the ForceField to use for determining the positions of hydrogens.
            If this is None, positions will be picked which are generally
            reasonable but not optimized for any particular ForceField.
        pH : float=7.0
            the pH based on which to select variants
        variants : list=None
            an optional list of variants to use.  If this is specified, its
            length must equal the number of residues in the model.  variants[i]
            is the name of the variant to use for residue i (indexed starting at
            0). If an element is None, the standard rules will be followed to
            select a variant for that residue.
        platform : Platform=None
            the Platform to use when computing the hydrogen atom positions.  If
            this is None, the default Platform will be used.

        Returns
        -------
             a list of what variant was actually selected for each residue,
             in the same format as the variants param
Peter Eastman's avatar
Peter Eastman committed
658
659
        """
        # Check the list of variants.
Justin MacCallum's avatar
Justin MacCallum committed
660

Peter Eastman's avatar
Peter Eastman committed
661
662
663
664
665
666
        residues = list(self.topology.residues())
        if variants is not None:
            if len(variants) != len(residues):
                raise ValueError("The length of the variants list must equal the number of residues")
        else:
            variants = [None]*len(residues)
667
        actualVariants = [None]*len(residues)
Justin MacCallum's avatar
Justin MacCallum committed
668

Peter Eastman's avatar
Peter Eastman committed
669
        # Load the residue specifications.
Justin MacCallum's avatar
Justin MacCallum committed
670

671
672
        if not Modeller._hasLoadedStandardHydrogens:
            Modeller.loadHydrogenDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'hydrogens.xml'))
Peter Eastman's avatar
Peter Eastman committed
673
674

        # Make a list of atoms bonded to each atom.
Justin MacCallum's avatar
Justin MacCallum committed
675

Peter Eastman's avatar
Peter Eastman committed
676
        bonded = {}
677
678
        for atom in self.topology.atoms():
            bonded[atom] = []
Peter Eastman's avatar
Peter Eastman committed
679
680
681
        for atom1, atom2 in self.topology.bonds():
            bonded[atom1].append(atom2)
            bonded[atom2].append(atom1)
Justin MacCallum's avatar
Justin MacCallum committed
682

683
        # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria.
Justin MacCallum's avatar
Justin MacCallum committed
684

685
686
687
688
689
690
691
692
        def isHbond(d, h, a):
            if norm(d-a) > 0.35*nanometer:
                return False
            deltaDH = h-d
            deltaHA = a-h
            deltaDH /= norm(deltaDH)
            deltaHA /= norm(deltaHA)
            return acos(dot(deltaDH, deltaHA)) < 50*degree
Justin MacCallum's avatar
Justin MacCallum committed
693

Peter Eastman's avatar
Peter Eastman committed
694
        # Loop over residues.
Justin MacCallum's avatar
Justin MacCallum committed
695

Peter Eastman's avatar
Peter Eastman committed
696
        newTopology = Topology()
697
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
Peter Eastman's avatar
Peter Eastman committed
698
699
700
701
702
        newAtoms = {}
        newPositions = []*nanometer
        newIndices = []
        acceptors = [atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen)]
        for chain in self.topology.chains():
703
            newChain = newTopology.addChain(chain.id)
Peter Eastman's avatar
Peter Eastman committed
704
            for residue in chain.residues():
705
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Peter Eastman's avatar
Peter Eastman committed
706
707
                isNTerminal = (residue == chain._residues[0])
                isCTerminal = (residue == chain._residues[-1])
708
                if residue.name in Modeller._residueHydrogens:
Peter Eastman's avatar
Peter Eastman committed
709
                    # Add hydrogens.  First select which variant to use.
Justin MacCallum's avatar
Justin MacCallum committed
710

711
                    spec = Modeller._residueHydrogens[residue.name]
Peter Eastman's avatar
Peter Eastman committed
712
713
714
715
                    variant = variants[residue.index]
                    if variant is None:
                        if residue.name == 'CYS':
                            # If this is part of a disulfide, use CYX.
Justin MacCallum's avatar
Justin MacCallum committed
716

Peter Eastman's avatar
Peter Eastman committed
717
718
719
720
                            sulfur = [atom for atom in residue.atoms() if atom.element == elem.sulfur]
                            if len(sulfur) == 1 and any((atom.residue != residue for atom in bonded[sulfur[0]])):
                                variant = 'CYX'
                        if residue.name == 'HIS' and pH > 6.5:
721
                            # See if either nitrogen already has a hydrogen attached.
Justin MacCallum's avatar
Justin MacCallum committed
722

723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
                            nd1 = [atom for atom in residue.atoms() if atom.name == 'ND1']
                            ne2 = [atom for atom in residue.atoms() if atom.name == 'NE2']
                            if len(nd1) != 1 or len(ne2) != 1:
                                raise ValueError('HIS residue (%d) has the wrong set of atoms' % residue.index)
                            nd1 = nd1[0]
                            ne2 = ne2[0]
                            nd1HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[nd1]))
                            ne2HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[ne2]))
                            if nd1HasHydrogen and ne2HasHydrogen:
                                variant = 'HIP'
                            elif nd1HasHydrogen:
                                variant = 'HID'
                            elif ne2HasHydrogen:
                                variant = 'HIE'
                            else:
                                # Estimate the hydrogen positions.
Justin MacCallum's avatar
Justin MacCallum committed
739

740
741
742
743
744
745
746
747
748
749
750
751
752
753
                                nd1Pos = self.positions[nd1.index]
                                ne2Pos = self.positions[ne2.index]
                                hd1Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[nd1]:
                                    hd1Delta += nd1Pos-self.positions[other.index]
                                hd1Delta *= 0.1*nanometer/norm(hd1Delta)
                                hd1Pos = nd1Pos+hd1Delta
                                he2Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[ne2]:
                                    he2Delta += ne2Pos-self.positions[other.index]
                                he2Delta *= 0.1*nanometer/norm(he2Delta)
                                he2Pos = ne2Pos+he2Delta

                                # See whether either hydrogen would form a hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
754

755
756
757
758
759
760
761
762
763
764
765
766
767
768
                                nd1IsBonded = False
                                ne2IsBonded = False
                                for acceptor in acceptors:
                                    if acceptor.residue != residue:
                                        acceptorPos = self.positions[acceptor.index]
                                        if isHbond(nd1Pos, hd1Pos, acceptorPos):
                                            nd1IsBonded = True
                                            break
                                        if isHbond(ne2Pos, he2Pos, acceptorPos):
                                            ne2IsBonded = True
                                if ne2IsBonded and not nd1IsBonded:
                                    variant = 'HIE'
                                else:
                                    variant = 'HID'
Peter Eastman's avatar
Peter Eastman committed
769
770
771
772
                        elif residue.name == 'HIS':
                            variant = 'HIP'
                    if variant is not None and variant not in spec.variants:
                        raise ValueError('Illegal variant for %s residue: %s' % (residue.name, variant))
773
                    actualVariants[residue.index] = variant
Justin MacCallum's avatar
Justin MacCallum committed
774

Peter Eastman's avatar
Peter Eastman committed
775
                    # Make a list of hydrogens that should be present in the residue.
Justin MacCallum's avatar
Justin MacCallum committed
776

Peter Eastman's avatar
Peter Eastman committed
777
778
779
780
781
                    parents = [atom for atom in residue.atoms() if atom.element != elem.hydrogen]
                    parentNames = [atom.name for atom in parents]
                    hydrogens = [h for h in spec.hydrogens if (variant is None and pH <= h.maxph) or (h.variants is None and pH <= h.maxph) or (h.variants is not None and variant in h.variants)]
                    hydrogens = [h for h in hydrogens if h.terminal is None or (isNTerminal and h.terminal == 'N') or (isCTerminal and h.terminal == 'C')]
                    hydrogens = [h for h in hydrogens if h.parent in parentNames]
Justin MacCallum's avatar
Justin MacCallum committed
782

Peter Eastman's avatar
Peter Eastman committed
783
                    # Loop over atoms in the residue, adding them to the new topology along with required hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
784

Peter Eastman's avatar
Peter Eastman committed
785
786
                    for parent in residue.atoms():
                        # Add the atom.
Justin MacCallum's avatar
Justin MacCallum committed
787

Peter Eastman's avatar
Peter Eastman committed
788
789
790
791
792
                        newAtom = newTopology.addAtom(parent.name, parent.element, newResidue)
                        newAtoms[parent] = newAtom
                        newPositions.append(deepcopy(self.positions[parent.index]))
                        if parent in parents:
                            # Match expected hydrogens with existing ones and find which ones need to be added.
Justin MacCallum's avatar
Justin MacCallum committed
793

Peter Eastman's avatar
Peter Eastman committed
794
795
796
797
                            existing = [atom for atom in bonded[parent] if atom.element == elem.hydrogen]
                            expected = [h for h in hydrogens if h.parent == parent.name]
                            if len(existing) < len(expected):
                                # Try to match up existing hydrogens to expected ones.
Justin MacCallum's avatar
Justin MacCallum committed
798

Peter Eastman's avatar
Peter Eastman committed
799
800
801
802
803
804
805
806
                                matches = []
                                for e in existing:
                                    match = [h for h in expected if h.name == e.name]
                                    if len(match) > 0:
                                        matches.append(match[0])
                                        expected.remove(match[0])
                                    else:
                                        matches.append(None)
Justin MacCallum's avatar
Justin MacCallum committed
807

Peter Eastman's avatar
Peter Eastman committed
808
                                # If any hydrogens couldn't be matched by name, just match them arbitrarily.
Justin MacCallum's avatar
Justin MacCallum committed
809

Peter Eastman's avatar
Peter Eastman committed
810
811
812
813
                                for i in range(len(matches)):
                                    if matches[i] is None:
                                        matches[i] = expected[-1]
                                        expected.remove(expected[-1])
Justin MacCallum's avatar
Justin MacCallum committed
814

Peter Eastman's avatar
Peter Eastman committed
815
                                # Add the missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
816

Peter Eastman's avatar
Peter Eastman committed
817
818
819
                                for h in expected:
                                    newH = newTopology.addAtom(h.name, elem.hydrogen, newResidue)
                                    newIndices.append(newH.index)
820
821
                                    delta = Vec3(0, 0, 0)*nanometer
                                    if len(bonded[parent]) > 0:
Peter Eastman's avatar
Peter Eastman committed
822
823
824
825
                                        for other in bonded[parent]:
                                            delta += self.positions[parent.index]-self.positions[other.index]
                                    else:
                                        delta = Vec3(random.random(), random.random(), random.random())*nanometer
826
                                    delta *= 0.1*nanometer/norm(delta)
827
828
                                    delta += 0.05*Vec3(random.random(), random.random(), random.random())*nanometer
                                    delta *= 0.1*nanometer/norm(delta)
Peter Eastman's avatar
Peter Eastman committed
829
830
831
832
                                    newPositions.append(self.positions[parent.index]+delta)
                                    newTopology.addBond(newAtom, newH)
                else:
                    # Just copy over the residue.
Justin MacCallum's avatar
Justin MacCallum committed
833

Peter Eastman's avatar
Peter Eastman committed
834
835
836
837
838
839
840
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
841

842
843
844
845
        # The hydrogens were added at random positions.  Now perform an energy minimization to fix them up.

        if forcefield is not None:
            # Use the ForceField the user specified.
Robert McGibbon's avatar
Robert McGibbon committed
846

847
848
849
850
851
852
853
854
855
            system = forcefield.createSystem(newTopology, rigidWater=False)
            atoms = list(newTopology.atoms())
            for i in range(system.getNumParticles()):
                if atoms[i].element != elem.hydrogen:
                    # This is a heavy atom, so make it immobile.
                    system.setParticleMass(i, 0)
        else:
            # Create a System that restrains the distance of each hydrogen from its parent atom
            # and causes hydrogens to spread out evenly.
Robert McGibbon's avatar
Robert McGibbon committed
856

857
            system = System()
858
            nonbonded = CustomNonbondedForce('100/((r/0.1)^4+1)')
859
860
861
862
863
            bonds = HarmonicBondForce()
            angles = HarmonicAngleForce()
            system.addForce(nonbonded)
            system.addForce(bonds)
            system.addForce(angles)
864
            bondedTo = []
865
866
867
868
869
870
            for atom in newTopology.atoms():
                nonbonded.addParticle([])
                if atom.element != elem.hydrogen:
                    system.addParticle(0.0)
                else:
                    system.addParticle(1.0)
871
                bondedTo.append([])
872
873
874
            for atom1, atom2 in newTopology.bonds():
                if atom1.element == elem.hydrogen or atom2.element == elem.hydrogen:
                    bonds.addBond(atom1.index, atom2.index, 0.1, 100000.0)
875
876
                bondedTo[atom1.index].append(atom2)
                bondedTo[atom2.index].append(atom1)
877
878
            for residue in newTopology.residues():
                if residue.name == 'HOH':
879
                    # Add an angle term to make the water geometry correct.
Robert McGibbon's avatar
Robert McGibbon committed
880

881
882
883
884
885
                    atoms = list(residue.atoms())
                    oindex = [i for i in range(len(atoms)) if atoms[i].element == elem.oxygen]
                    if len(atoms) == 3 and len(oindex) == 1:
                        hindex = list(set([0,1,2])-set(oindex))
                        angles.addAngle(atoms[hindex[0]].index, atoms[oindex[0]].index, atoms[hindex[1]].index, 1.824, 836.8)
886
887
                else:
                    # Add angle terms for any hydroxyls.
Robert McGibbon's avatar
Robert McGibbon committed
888

889
890
891
892
                    for atom in residue.atoms():
                        index = atom.index
                        if atom.element == elem.oxygen and len(bondedTo[index]) == 2 and elem.hydrogen in (a.element for a in bondedTo[index]):
                            angles.addAngle(bondedTo[index][0].index, index, bondedTo[index][1].index, 1.894, 460.24)
Robert McGibbon's avatar
Robert McGibbon committed
893

894
895
896
897
        if platform is None:
            context = Context(system, VerletIntegrator(0.0))
        else:
            context = Context(system, VerletIntegrator(0.0), platform)
Peter Eastman's avatar
Peter Eastman committed
898
        context.setPositions(newPositions)
899
        LocalEnergyMinimizer.minimize(context, 1.0, 50)
Peter Eastman's avatar
Peter Eastman committed
900
901
        self.topology = newTopology
        self.positions = context.getState(getPositions=True).getPositions()
902
        del context
903
        return actualVariants
Justin MacCallum's avatar
Justin MacCallum committed
904

905
    def addExtraParticles(self, forcefield):
Robert McGibbon's avatar
Robert McGibbon committed
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
        """Add missing extra particles to the model that are required by a force
        field.

        Some force fields use "extra particles" that do not represent
        actual atoms, but still need to be included in the System.  Examples
        include lone pairs, Drude particles, and the virtual sites used in some
        water models to adjust the charge distribution.  Extra particles can be
        recognized by the fact that their element is None.

        This method is primarily used to add extra particles, but it can also
        remove them.  It tries to match every residue in the Topology to a
        template in the force field.  If there is no match, it will both add
        and remove extra particles as necessary to make it match.

        Parameters
        ----------
        forcefield : ForceField
            the ForceField defining what extra particles should be present
924
925
        """
        # Create copies of all residue templates that have had all extra points removed.
Justin MacCallum's avatar
Justin MacCallum committed
926

927
        templatesNoEP = {}
928
        for resName, template  in forcefield._templates.items():
929
930
931
932
933
934
935
936
            if any(atom.element is None for atom in template.atoms):
                index = 0
                newIndex = {}
                newTemplate = ForceField._TemplateData(resName)
                for i, atom in enumerate(template.atoms):
                    if atom.element is not None:
                        newIndex[i] = index
                        index += 1
937
938
939
                        newAtom = ForceField._TemplateAtomData(atom.name, atom.type, atom.element)
                        newAtom.externalBonds = atom.externalBonds
                        newTemplate.atoms.append(newAtom)
940
941
942
943
944
945
946
947
948
                for b1, b2 in template.bonds:
                    if b1 in newIndex and b2 in newIndex:
                        newTemplate.bonds.append((newIndex[b1], newIndex[b2]))
                        newTemplate.atoms[newIndex[b1]].bondedTo.append(newIndex[b2])
                        newTemplate.atoms[newIndex[b2]].bondedTo.append(newIndex[b1])
                for b in template.externalBonds:
                    if b in newIndex:
                        newTemplate.externalBonds.append(newIndex[b])
                templatesNoEP[template] = newTemplate
Justin MacCallum's avatar
Justin MacCallum committed
949

950
        # Record which atoms are bonded to each other atom, with and without extra particles.
Justin MacCallum's avatar
Justin MacCallum committed
951

952
953
954
955
        bondedToAtom = []
        bondedToAtomNoEP = []
        for atom in self.topology.atoms():
            bondedToAtom.append(set())
peastman's avatar
peastman committed
956
            bondedToAtomNoEP.append(set())
957
958
959
960
961
962
        for atom1, atom2 in self.topology.bonds():
            bondedToAtom[atom1.index].add(atom2.index)
            bondedToAtom[atom2.index].add(atom1.index)
            if atom1.element is not None and atom2.element is not None:
                bondedToAtomNoEP[atom1.index].add(atom2.index)
                bondedToAtomNoEP[atom2.index].add(atom1.index)
Justin MacCallum's avatar
Justin MacCallum committed
963

peastman's avatar
peastman committed
964
965
        # If the force field has a DrudeForce, record the types of Drude particles and their parents since we'll
        # need them for picking particle positions.
Justin MacCallum's avatar
Justin MacCallum committed
966

peastman's avatar
peastman committed
967
968
969
970
971
        drudeTypeMap = {}
        for force in forcefield._forces:
            if isinstance(force, DrudeGenerator):
                for type in force.typeMap:
                    drudeTypeMap[type] = force.typeMap[type][0]
Justin MacCallum's avatar
Justin MacCallum committed
972

973
        # Create the new Topology.
Justin MacCallum's avatar
Justin MacCallum committed
974

975
        newTopology = Topology()
976
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
977
978
979
        newAtoms = {}
        newPositions = []*nanometer
        for chain in self.topology.chains():
980
            newChain = newTopology.addChain(chain.id)
981
            for residue in chain.residues():
982
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Justin MacCallum's avatar
Justin MacCallum committed
983

984
                # Look for a matching template.
Justin MacCallum's avatar
Justin MacCallum committed
985

986
987
988
989
990
991
992
993
                matchFound = False
                signature = _createResidueSignature([atom.element for atom in residue.atoms()])
                if signature in forcefield._templateSignatures:
                    for t in forcefield._templateSignatures[signature]:
                        if _matchResidue(residue, t, bondedToAtom) is not None:
                            matchFound = True
                if matchFound:
                    # Just copy the residue over.
Justin MacCallum's avatar
Justin MacCallum committed
994

995
996
997
998
999
1000
1001
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
                else:
                    # There's no matching template.  Try to find one that matches based on everything except
                    # extra points.
Justin MacCallum's avatar
Justin MacCallum committed
1002

1003
                    template = None
1004
                    residueNoEP = Residue(residue.name, residue.index, residue.chain, residue.id)
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
                    residueNoEP._atoms = [atom for atom in residue.atoms() if atom.element is not None]
                    if signature in forcefield._templateSignatures:
                        for t in forcefield._templateSignatures[signature]:
                            if t in templatesNoEP:
                                matches = _matchResidue(residueNoEP, templatesNoEP[t], bondedToAtomNoEP)
                                if matches is not None:
                                    template = t;
                                    # Record the corresponding atoms.
                                    matchingAtoms = {}
                                    for atom, match in zip(residueNoEP.atoms(), matches):
peastman's avatar
peastman committed
1015
                                        templateAtomName = templatesNoEP[t].atoms[match].name
1016
1017
1018
1019
1020
1021
                                        for templateAtom in template.atoms:
                                            if templateAtom.name == templateAtomName:
                                                matchingAtoms[templateAtom] = atom
                                    break
                    if template is None:
                        raise ValueError('Residue %d (%s) does not match any template defined by the ForceField.' % (residue.index+1, residue.name))
Justin MacCallum's avatar
Justin MacCallum committed
1022

1023
                    # Add the regular atoms.
Justin MacCallum's avatar
Justin MacCallum committed
1024

1025
1026
1027
1028
                    for atom in residue.atoms():
                        if atom.element is not None:
                            newAtoms[atom] = newTopology.addAtom(atom.name, atom.element, newResidue)
                            newPositions.append(deepcopy(self.positions[atom.index]))
Justin MacCallum's avatar
Justin MacCallum committed
1029

1030
                    # Add the extra points.
Justin MacCallum's avatar
Justin MacCallum committed
1031

1032
1033
1034
                    templateAtomPositions = len(template.atoms)*[None]
                    for index, atom in enumerate(template.atoms):
                        if atom in matchingAtoms:
peastman's avatar
peastman committed
1035
                            templateAtomPositions[index] = self.positions[matchingAtoms[atom].index].value_in_unit(nanometer)
1036
1037
1038
1039
1040
1041
1042
                    for index, atom in enumerate(template.atoms):
                        if atom.element is None:
                            newTopology.addAtom(atom.name, None, newResidue)
                            position = None
                            for site in template.virtualSites:
                                if site.index == index:
                                    # This is a virtual site.  Compute its position by the correct rule.
Justin MacCallum's avatar
Justin MacCallum committed
1043

1044
                                    if site.type == 'average2':
Matthew Harrigan's avatar
Matthew Harrigan committed
1045
                                        position = site.weights[0]*templateAtomPositions[site.atoms[0]] + site.weights[1]*templateAtomPositions[site.atoms[1]]
1046
                                    elif site.type == 'average3':
Matthew Harrigan's avatar
Matthew Harrigan committed
1047
                                        position = site.weights[0]*templateAtomPositions[site.atoms[0]] + site.weights[1]*templateAtomPositions[site.atoms[1]] + site.weights[2]*templateAtomPositions[site.atoms[2]]
1048
                                    elif site.type == 'outOfPlane':
Matthew Harrigan's avatar
Matthew Harrigan committed
1049
1050
                                        v1 = templateAtomPositions[site.atoms[1]] - templateAtomPositions[site.atoms[0]]
                                        v2 = templateAtomPositions[site.atoms[2]] - templateAtomPositions[site.atoms[0]]
peastman's avatar
peastman committed
1051
                                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Matthew Harrigan's avatar
Matthew Harrigan committed
1052
                                        position = templateAtomPositions[site.atoms[0]] + site.weights[0]*v1 + site.weights[1]*v2 + site.weights[2]*cross
peastman's avatar
peastman committed
1053
1054
                            if position is None and atom.type in drudeTypeMap:
                                # This is a Drude particle.  Put it on top of its parent atom.
Justin MacCallum's avatar
Justin MacCallum committed
1055

peastman's avatar
peastman committed
1056
1057
1058
                                for atom2, pos in zip(template.atoms, templateAtomPositions):
                                    if atom2.type in drudeTypeMap[atom.type]:
                                        position = deepcopy(pos)
1059
1060
1061
                            if position is None:
                                # We couldn't figure out the correct position.  As a wild guess, just put it at the center of the residue
                                # and hope that energy minimization will fix it.
Justin MacCallum's avatar
Justin MacCallum committed
1062

1063
                                knownPositions = [x for x in templateAtomPositions if x is not None]
1064
                                position = unit.sum(knownPositions)/len(knownPositions)
peastman's avatar
peastman committed
1065
                            newPositions.append(position*nanometer)
1066
1067
1068
1069
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
1070
        self.positions = newPositions