"docs-source/vscode:/vscode.git/clone" did not exist on "3fa880603b2a7074fe2bc571e37eaa819b6b6c70"
modeller.py 59.1 KB
Newer Older
1
2
"""
modeller.py: Provides tools for editing molecular models
3
4
5
6
7
8

This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.

Peter Eastman's avatar
Peter Eastman committed
9
Portions copyright (c) 2012-2016 Stanford University and the Authors.
10
11
12
Authors: Peter Eastman
Contributors:

Justin MacCallum's avatar
Justin MacCallum committed
13
Permission is hereby granted, free of charge, to any person obtaining a
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
30
"""
Peter Eastman's avatar
Peter Eastman committed
31
from __future__ import division
32
from __future__ import absolute_import
Peter Eastman's avatar
Peter Eastman committed
33

34
35
36
__author__ = "Peter Eastman"
__version__ = "1.0"

37
from simtk.openmm.app import Topology, PDBFile, ForceField
38
from simtk.openmm.app.forcefield import HAngles, AllBonds, _createResidueSignature, _matchResidue, DrudeGenerator
39
from simtk.openmm.app.topology import Residue
40
from simtk.openmm.vec3 import Vec3
41
from simtk.openmm import System, Context, NonbondedForce, CustomNonbondedForce, HarmonicBondForce, HarmonicAngleForce, VerletIntegrator, LocalEnergyMinimizer
42
from simtk.unit import nanometer, molar, elementary_charge, amu, gram, liter, degree, sqrt, acos, is_quantity, dot, norm
43
import simtk.unit as unit
44
from . import element as elem
Peter Eastman's avatar
Peter Eastman committed
45
46
import os
import random
Peter Eastman's avatar
Peter Eastman committed
47
import xml.etree.ElementTree as etree
Peter Eastman's avatar
Peter Eastman committed
48
49
from copy import deepcopy
from math import ceil, floor
50
51
52

class Modeller(object):
    """Modeller provides tools for editing molecular models, such as adding water or missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
53

54
55
56
57
58
    To use it, create a Modeller object, specifying the initial Topology and atom positions.  You can
    then call various methods to change the model in different ways.  Each time you do, a new Topology
    and list of coordinates is created to represent the changed model.  Finally, call getTopology()
    and getPositions() to get the results.
    """
Justin MacCallum's avatar
Justin MacCallum committed
59

60
61
    _residueHydrogens = {}
    _hasLoadedStandardHydrogens = False
Peter Eastman's avatar
Peter Eastman committed
62

63
64
    def __init__(self, topology, positions):
        """Create a new Modeller object
Justin MacCallum's avatar
Justin MacCallum committed
65

Robert McGibbon's avatar
Robert McGibbon committed
66
67
68
69
70
71
        Parameters
        ----------
        topology : Topology
            the initial Topology of the model
        positions : list
            the initial atomic positions
72
        """
73
        ## The Topology describing the structure of the system
74
        self.topology = topology
Peter Eastman's avatar
Peter Eastman committed
75
        if not is_quantity(positions):
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
76
            positions = positions*nanometer
77
        ## The list of atom positions
78
        self.positions = positions
Justin MacCallum's avatar
Justin MacCallum committed
79

80
81
82
    def getTopology(self):
        """Get the Topology of the model."""
        return self.topology
Justin MacCallum's avatar
Justin MacCallum committed
83

84
85
86
87
    def getPositions(self):
        """Get the atomic positions."""
        return self.positions

88
89
    def add(self, addTopology, addPositions):
        """Add chains, residues, atoms, and bonds to the model.
Justin MacCallum's avatar
Justin MacCallum committed
90

Robert McGibbon's avatar
Robert McGibbon committed
91
92
93
        Specify what to add by providing a new Topology object and the
        corresponding atomic positions. All chains, residues, atoms, and bonds
        contained in the Topology are added to the model.
Justin MacCallum's avatar
Justin MacCallum committed
94

Robert McGibbon's avatar
Robert McGibbon committed
95
96
97
98
99
100
        Parameters
        ----------
        addTopology : Topology
            a Topology whose contents should be added to the model
        addPositions : list
            the positions of the atoms to add
101
102
        """
        # Copy over the existing model.
Justin MacCallum's avatar
Justin MacCallum committed
103

104
        newTopology = Topology()
105
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
106
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
107
        newPositions = []*nanometer
108
        for chain in self.topology.chains():
109
            newChain = newTopology.addChain(chain.id)
110
            for residue in chain.residues():
111
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
112
                for atom in residue.atoms():
113
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
114
115
116
117
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
118

119
120
121
122
        # Add the new model

        newAtoms = {}
        for chain in addTopology.chains():
123
            newChain = newTopology.addChain(chain.id)
124
            for residue in chain.residues():
125
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
126
                for atom in residue.atoms():
127
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
128
129
130
131
132
133
134
135
136
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(addPositions[atom.index]))
        for bond in addTopology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions

    def delete(self, toDelete):
        """Delete chains, residues, atoms, and bonds from the model.
Justin MacCallum's avatar
Justin MacCallum committed
137

138
139
140
141
        You can specify objects to delete at any granularity: atoms, residues, or chains.  Passing
        in an Atom object causes that Atom to be deleted.  Passing in a Residue object causes that
        Residue and all Atoms it contains to be deleted.  Passing in a Chain object causes that
        Chain and all Residues and Atoms it contains to be deleted.
Justin MacCallum's avatar
Justin MacCallum committed
142

143
144
145
        In all cases, when an Atom is deleted, any bonds it participates in are also deleted.
        You also can specify a bond (as a tuple of Atom objects) to delete just that bond without
        deleting the Atoms it connects.
Justin MacCallum's avatar
Justin MacCallum committed
146

Robert McGibbon's avatar
Robert McGibbon committed
147
148
149
150
151
        Parameters
        ----------
        toDelete : list
            a list of Atoms, Residues, Chains, and bonds (specified as tuples of
            Atoms) to delete
152
153
        """
        newTopology = Topology()
154
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
155
156
157
158
159
        newAtoms = {}
        newPositions = []*nanometer
        deleteSet = set(toDelete)
        for chain in self.topology.chains():
            if chain not in deleteSet:
160
                needNewChain = True;
161
162
                for residue in chain.residues():
                    if residue not in deleteSet:
163
                        needNewResidue = True
164
165
                        for atom in residue.atoms():
                            if atom not in deleteSet:
166
                                if needNewChain:
167
                                    newChain = newTopology.addChain(chain.id)
168
169
                                    needNewChain = False;
                                if needNewResidue:
170
                                    newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
171
                                    needNewResidue = False;
172
                                newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
173
174
                                newAtoms[atom] = newAtom
                                newPositions.append(deepcopy(self.positions[atom.index]))
175
176
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
177
178
                if bond not in deleteSet and (bond[1], bond[0]) not in deleteSet:
                    newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
179
180
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
181

182
183
184
    def deleteWater(self):
        """Delete all water molecules from the model."""
        self.delete(res for res in self.topology.residues() if res.name == "HOH")
Justin MacCallum's avatar
Justin MacCallum committed
185

186
187
    def convertWater(self, model='tip3p'):
        """Convert all water molecules to a different water model.
Justin MacCallum's avatar
Justin MacCallum committed
188

189
190
191
        @deprecated Use addExtraParticles() instead.  It performs the same
        function but in a more general way.

192
193
194
195
196
        Parameters
        ----------
        model : string='tip3p'
            the water model to convert to.  Supported values are 'tip3p',
            'spce', 'tip4pew', and 'tip5p'.
197
        """
Peter Eastman's avatar
Peter Eastman committed
198
        if model in ('tip3p', 'spce'):
199
200
201
202
203
204
205
206
            sites = 3
        elif model == 'tip4pew':
            sites = 4
        elif model == 'tip5p':
            sites = 5
        else:
            raise ValueError('Unknown water model: %s' % model)
        newTopology = Topology()
207
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
208
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
209
        newPositions = []*nanometer
210
        for chain in self.topology.chains():
211
            newChain = newTopology.addChain(chain.id)
212
            for residue in chain.residues():
213
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
214
215
216
217
218
219
220
221
222
223
224
225
                if residue.name == "HOH":
                    # Copy the oxygen and hydrogens
                    oatom = [atom for atom in residue.atoms() if atom.element == elem.oxygen]
                    hatoms = [atom for atom in residue.atoms() if atom.element == elem.hydrogen]
                    if len(oatom) != 1 or len(hatoms) != 2:
                        raise ValueError('Illegal water molecule (residue %d): contains %d oxygen(s) and %d hydrogen(s)' % (residue.index, len(oatom), len(hatoms)))
                    o = newTopology.addAtom(oatom[0].name, oatom[0].element, newResidue)
                    h1 = newTopology.addAtom(hatoms[0].name, hatoms[0].element, newResidue)
                    h2 = newTopology.addAtom(hatoms[1].name, hatoms[1].element, newResidue)
                    newAtoms[oatom[0]] = o
                    newAtoms[hatoms[0]] = h1
                    newAtoms[hatoms[1]] = h2
Peter Eastman's avatar
Peter Eastman committed
226
227
228
                    po = deepcopy(self.positions[oatom[0].index])
                    ph1 = deepcopy(self.positions[hatoms[0].index])
                    ph2 = deepcopy(self.positions[hatoms[1].index])
229
230
231
                    newPositions.append(po)
                    newPositions.append(ph1)
                    newPositions.append(ph2)
Justin MacCallum's avatar
Justin MacCallum committed
232

233
                    # Add virtual sites.
Justin MacCallum's avatar
Justin MacCallum committed
234

235
236
                    if sites == 4:
                        newTopology.addAtom('M', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
237
                        newPositions.append(0.786646558*po + 0.106676721*ph1 + 0.106676721*ph2)
238
239
240
                    elif sites == 5:
                        newTopology.addAtom('M1', None, newResidue)
                        newTopology.addAtom('M2', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
241
242
                        v1 = (ph1-po).value_in_unit(nanometer)
                        v2 = (ph2-po).value_in_unit(nanometer)
243
                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Peter Eastman's avatar
Peter Eastman committed
244
245
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 - 6.4437903*cross)*nanometer)
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 + 6.4437903*cross)*nanometer)
246
247
248
                else:
                    # Just copy the residue over.
                    for atom in residue.atoms():
249
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
250
                        newAtoms[atom] = newAtom
Peter Eastman's avatar
Peter Eastman committed
251
                        newPositions.append(deepcopy(self.positions[atom.index]))
252
253
254
255
256
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions
Peter Eastman's avatar
Peter Eastman committed
257

258
    def addSolvent(self, forcefield, model='tip3p', boxSize=None, boxVectors=None, padding=None, numAdded=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar, neutralize=True):
Peter Eastman's avatar
Peter Eastman committed
259
        """Add solvent (both water and ions) to the model to fill a rectangular box.
Justin MacCallum's avatar
Justin MacCallum committed
260

Peter Eastman's avatar
Peter Eastman committed
261
        The algorithm works as follows:
Robert McGibbon's avatar
Robert McGibbon committed
262

Peter Eastman's avatar
Peter Eastman committed
263
264
        1. Water molecules are added to fill the box.
        2. Water molecules are removed if their distance to any solute atom is less than the sum of their van der Waals radii.
265
        3. If the solute is charged and neutralize=True, enough positive or negative ions are added to neutralize it.  Each ion is added by
Peter Eastman's avatar
Peter Eastman committed
266
           randomly selecting a water molecule and replacing it with the ion.
267
        4. Ion pairs are added to give the requested total ionic strength.  Note that only monovalent ions are currently supported.
Justin MacCallum's avatar
Justin MacCallum committed
268

269
        The box size can be specified in any of several ways:
Robert McGibbon's avatar
Robert McGibbon committed
270

271
272
273
        1. You can explicitly give the vectors defining the periodic box to use.
        2. Alternatively, for a rectangular box you can simply give the dimensions of the unit cell.
        3. You can give a padding distance.  The largest dimension of the solute (along the x, y, or z axis) is determined, and a cubic
274
           box of size (largest dimension)+2*padding is used.
275
        4. You can specify the total number of molecules (both waters and ions) to add.  A cubic box is then created whose size is
Robert McGibbon's avatar
Robert McGibbon committed
276
           just large enough hold the specified amount of solvent.
277
        5. Finally, if none of the above options is specified, the existing Topology's box vectors are used.
Justin MacCallum's avatar
Justin MacCallum committed
278

Robert McGibbon's avatar
Robert McGibbon committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
        Parameters
        ----------
        forcefield : ForceField
            the ForceField to use for determining van der Waals radii and atomic charges
        model : str='tip3p'
            the water model to use.  Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
        boxSize : Vec3=None
            the size of the box to fill with water
        boxVectors : tuple of Vec3=None
            the vectors defining the periodic box to fill with water
        padding : distance=None
            the padding distance to use
        numAdded : int=None
            the total number of molecules (waters and ions) to add
        positiveIon : string='Na+'
            the type of positive ion to add.  Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'
        negativeIon : string='Cl-'
            the type of negative ion to add.  Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware
            that not all force fields support all ion types.
        ionicStrength : concentration=0*molar
            the total concentration of ions (both positive and negative) to add.  This
            does not include ions that are added to neutralize the system.
301
            Note that only monovalent ions are currently supported.
Robert McGibbon's avatar
Robert McGibbon committed
302
303
        neutralize : bool=True
            whether to add ions to neutralize the system
Peter Eastman's avatar
Peter Eastman committed
304
        """
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
        if len([x for x in (boxSize, boxVectors, padding, numAdded) if x is not None]) > 1:
            raise ValueError('At most one of the following arguments may be specified: boxSize, boxVectors, padding, numAdded')

        # Load the pre-equilibrated water box.

        vdwRadiusPerSigma = 0.5612310241546864907
        if model == 'tip3p':
            waterRadius = 0.31507524065751241*vdwRadiusPerSigma
        elif model == 'spce':
            waterRadius = 0.31657195050398818*vdwRadiusPerSigma
        elif model == 'tip4pew':
            waterRadius = 0.315365*vdwRadiusPerSigma
        elif model == 'tip5p':
            waterRadius = 0.312*vdwRadiusPerSigma
        else:
            raise ValueError('Unknown water model: %s' % model)
        pdb = PDBFile(os.path.join(os.path.dirname(__file__), 'data', model+'.pdb'))
        pdbTopology = pdb.getTopology()
        pdbPositions = pdb.getPositions().value_in_unit(nanometer)
        pdbResidues = list(pdbTopology.residues())
Robert McGibbon's avatar
Robert McGibbon committed
325
326
        pdbBoxSize = pdbTopology.getUnitCellDimensions().value_in_unit(nanometer)

Peter Eastman's avatar
Peter Eastman committed
327
        # Pick a unit cell size.
Justin MacCallum's avatar
Justin MacCallum committed
328

329
330
        if numAdded is not None:
            # Select a padding distance which is guaranteed to give more than the specified number of molecules.
Robert McGibbon's avatar
Robert McGibbon committed
331

332
333
334
            padding = 1.1*(numAdded/((len(pdbResidues)/pdbBoxSize[0]**3)*8))**(1.0/3.0)
            if padding < 0.5:
                padding = 0.5 # Ensure we have enough when adding very small numbers of molecules
335
336
337
338
339
340
        if boxVectors is not None:
            if is_quantity(boxVectors[0]):
                boxVectors = (boxVectors[0].value_in_unit(nanometer), boxVectors[1].value_in_unit(nanometer), boxVectors[2].value_in_unit(nanometer))
            box = Vec3(boxVectors[0][0], boxVectors[1][1], boxVectors[2][2])
            vectors = boxVectors
        elif boxSize is not None:
341
342
            if is_quantity(boxSize):
                boxSize = boxSize.value_in_unit(nanometer)
343
344
            box = Vec3(boxSize[0], boxSize[1], boxSize[2])
            vectors = (Vec3(boxSize[0], 0, 0), Vec3(0, boxSize[1], 0), Vec3(0, 0, boxSize[2]))
Peter Eastman's avatar
Peter Eastman committed
345
        elif padding is not None:
346
347
            if is_quantity(padding):
                padding = padding.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
348
            maxSize = max(max((pos[i] for pos in self.positions))-min((pos[i] for pos in self.positions)) for i in range(3))
349
            maxSize = maxSize.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
350
            box = (maxSize+2*padding)*Vec3(1, 1, 1)
351
            vectors = (Vec3(maxSize+2*padding, 0, 0), Vec3(0, maxSize+2*padding, 0), Vec3(0, 0, maxSize+2*padding))
Peter Eastman's avatar
Peter Eastman committed
352
        else:
353
354
            box = self.topology.getUnitCellDimensions().value_in_unit(nanometer)
            vectors = self.topology.getPeriodicBoxVectors().value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
355
            if box is None:
356
                raise ValueError('Neither the box size, box vectors, nor padding was specified, and the Topology does not define unit cell dimensions')
Peter Eastman's avatar
Peter Eastman committed
357
        invBox = Vec3(1.0/box[0], 1.0/box[1], 1.0/box[2])
Justin MacCallum's avatar
Justin MacCallum committed
358

Peter Eastman's avatar
Peter Eastman committed
359
        # Identify the ion types.
Justin MacCallum's avatar
Justin MacCallum committed
360

Peter Eastman's avatar
Peter Eastman committed
361
362
363
364
365
366
367
368
        posIonElements = {'Cs+':elem.cesium, 'K+':elem.potassium, 'Li+':elem.lithium, 'Na+':elem.sodium, 'Rb+':elem.rubidium}
        negIonElements = {'Cl-':elem.chlorine, 'Br-':elem.bromine, 'F-':elem.fluorine, 'I-':elem.iodine}
        if positiveIon not in posIonElements:
            raise ValueError('Illegal value for positive ion: %s' % positiveIon)
        if negativeIon not in negIonElements:
            raise ValueError('Illegal value for negative ion: %s' % negativeIon)
        positiveElement = posIonElements[positiveIon]
        negativeElement = negIonElements[negativeIon]
Justin MacCallum's avatar
Justin MacCallum committed
369

Peter Eastman's avatar
Peter Eastman committed
370
        # Have the ForceField build a System for the solute from which we can determine van der Waals radii.
Justin MacCallum's avatar
Justin MacCallum committed
371

Peter Eastman's avatar
Peter Eastman committed
372
373
374
375
376
377
378
379
        system = forcefield.createSystem(self.topology)
        nonbonded = None
        for i in range(system.getNumForces()):
            if isinstance(system.getForce(i), NonbondedForce):
                nonbonded = system.getForce(i)
        if nonbonded is None:
            raise ValueError('The ForceField does not specify a NonbondedForce')
        cutoff = [nonbonded.getParticleParameters(i)[1].value_in_unit(nanometer)*vdwRadiusPerSigma+waterRadius for i in range(system.getNumParticles())]
380
        waterCutoff = waterRadius
Peter Eastman's avatar
Peter Eastman committed
381
382
383
384
        if len(cutoff) == 0:
            maxCutoff = waterCutoff
        else:
            maxCutoff = max(waterCutoff, max(cutoff))
Justin MacCallum's avatar
Justin MacCallum committed
385

Peter Eastman's avatar
Peter Eastman committed
386
387
388
        # Copy the solute over.

        newTopology = Topology()
389
        newTopology.setPeriodicBoxVectors(vectors*nanometer)
Peter Eastman's avatar
Peter Eastman committed
390
391
392
        newAtoms = {}
        newPositions = []*nanometer
        for chain in self.topology.chains():
393
            newChain = newTopology.addChain(chain.id)
Peter Eastman's avatar
Peter Eastman committed
394
            for residue in chain.residues():
395
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Peter Eastman's avatar
Peter Eastman committed
396
                for atom in residue.atoms():
397
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
Peter Eastman's avatar
Peter Eastman committed
398
399
400
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
401
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
402

Peter Eastman's avatar
Peter Eastman committed
403
        # Sort the solute atoms into cells for fast lookup.
Justin MacCallum's avatar
Justin MacCallum committed
404

Peter Eastman's avatar
Peter Eastman committed
405
406
407
408
        if len(self.positions) == 0:
            positions = []
        else:
            positions = self.positions.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
409
        cells = {}
Peter Eastman's avatar
Peter Eastman committed
410
        numCells = tuple((max(1, int(floor(box[i]/maxCutoff))) for i in range(3)))
Peter Eastman's avatar
Peter Eastman committed
411
412
413
414
415
416
417
        cellSize = tuple((box[i]/numCells[i] for i in range(3)))
        for i in range(len(positions)):
            cell = tuple((int(floor(positions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
            if cell in cells:
                cells[cell].append(i)
            else:
                cells[cell] = [i]
Justin MacCallum's avatar
Justin MacCallum committed
418

Peter Eastman's avatar
Peter Eastman committed
419
        # Create a generator that loops over atoms close to a position.
Justin MacCallum's avatar
Justin MacCallum committed
420

Peter Eastman's avatar
Peter Eastman committed
421
422
423
424
425
426
427
428
429
430
        def neighbors(pos):
            centralCell = tuple((int(floor(pos[i]/cellSize[i])) for i in range(3)))
            offsets = (-1, 0, 1)
            for i in offsets:
                for j in offsets:
                    for k in offsets:
                        cell = ((centralCell[0]+i+numCells[0])%numCells[0], (centralCell[1]+j+numCells[1])%numCells[1], (centralCell[2]+k+numCells[2])%numCells[2])
                        if cell in cells:
                            for atom in cells[cell]:
                                yield atom
Justin MacCallum's avatar
Justin MacCallum committed
431

Peter Eastman's avatar
Peter Eastman committed
432
        # Define a function to compute the distance between two points, taking periodic boundary conditions into account.
Justin MacCallum's avatar
Justin MacCallum committed
433

Peter Eastman's avatar
Peter Eastman committed
434
435
        def periodicDistance(pos1, pos2):
            delta = pos1-pos2
436
437
438
            delta -= vectors[2]*floor(delta[2]*invBox[2]+0.5)
            delta -= vectors[1]*floor(delta[1]*invBox[1]+0.5)
            delta -= vectors[0]*floor(delta[0]*invBox[0]+0.5)
439
            return norm(delta)
Justin MacCallum's avatar
Justin MacCallum committed
440

Peter Eastman's avatar
Peter Eastman committed
441
        # Find the list of water molecules to add.
Justin MacCallum's avatar
Justin MacCallum committed
442

Peter Eastman's avatar
Peter Eastman committed
443
        newChain = newTopology.addChain()
Peter Eastman's avatar
Peter Eastman committed
444
445
446
447
448
        if len(positions) == 0:
            center = Vec3(0, 0, 0)
        else:
            center = [(max((pos[i] for pos in positions))+min((pos[i] for pos in positions)))/2 for i in range(3)]
            center = Vec3(center[0], center[1], center[2])
Peter Eastman's avatar
Peter Eastman committed
449
450
451
452
453
454
455
456
457
458
459
        numBoxes = [int(ceil(box[i]/pdbBoxSize[i])) for i in range(3)]
        addedWaters = []
        for boxx in range(numBoxes[0]):
            for boxy in range(numBoxes[1]):
                for boxz in range(numBoxes[2]):
                    offset = Vec3(boxx*pdbBoxSize[0], boxy*pdbBoxSize[1], boxz*pdbBoxSize[2])
                    for residue in pdbResidues:
                        oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
                        atomPos = pdbPositions[oxygen.index]+offset
                        if not any((atomPos[i] > box[i] for i in range(3))):
                            # This molecule is inside the box, so see how close to it is to the solute.
Justin MacCallum's avatar
Justin MacCallum committed
460

Peter Eastman's avatar
Peter Eastman committed
461
462
463
464
465
466
                            atomPos += center-box/2
                            for i in neighbors(atomPos):
                                if periodicDistance(atomPos, positions[i]) < cutoff[i]:
                                    break
                            else:
                                # Record this water molecule as one to add.
Justin MacCallum's avatar
Justin MacCallum committed
467

Peter Eastman's avatar
Peter Eastman committed
468
                                addedWaters.append((residue.index, atomPos))
Justin MacCallum's avatar
Justin MacCallum committed
469

470
471
472
        if numAdded is not None:
            # We added many more waters than we actually want.  Sort them based on distance to the nearest box edge and
            # only keep the ones in the middle.
Robert McGibbon's avatar
Robert McGibbon committed
473

474
475
            lowerBound = center-box/2
            upperBound = center+box/2
peastman's avatar
peastman committed
476
            distToEdge = (min(min(pos-lowerBound), min(upperBound-pos)) for index, pos in addedWaters)
477
478
            sortedIndex = [i[0] for i in sorted(enumerate(distToEdge), key=lambda x: -x[1])]
            addedWaters = [addedWaters[i] for i in sortedIndex[:numAdded]]
Robert McGibbon's avatar
Robert McGibbon committed
479

480
            # Compute a new periodic box size.
Robert McGibbon's avatar
Robert McGibbon committed
481

482
483
484
485
            maxSize = max(max((pos[i] for index, pos in addedWaters))-min((pos[i] for index, pos in addedWaters)) for i in range(3))
            newTopology.setUnitCellDimensions(Vec3(maxSize, maxSize, maxSize))
        else:
            # There could be clashes between water molecules at the box edges.  Find ones to remove.
Robert McGibbon's avatar
Robert McGibbon committed
486

487
488
489
490
491
492
493
494
495
496
497
498
499
500
            upperCutoff = center+box/2-Vec3(waterCutoff, waterCutoff, waterCutoff)
            lowerCutoff = center-box/2+Vec3(waterCutoff, waterCutoff, waterCutoff)
            lowerSkinPositions = [pos for index, pos in addedWaters if pos[0] < lowerCutoff[0] or pos[1] < lowerCutoff[1] or pos[2] < lowerCutoff[2]]
            filteredWaters = []
            cells = {}
            for i in range(len(lowerSkinPositions)):
                cell = tuple((int(floor(lowerSkinPositions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
                if cell in cells:
                    cells[cell].append(i)
                else:
                    cells[cell] = [i]
            for entry in addedWaters:
                pos = entry[1]
                if pos[0] < upperCutoff[0] and pos[1] < upperCutoff[1] and pos[2] < upperCutoff[2]:
Peter Eastman's avatar
Peter Eastman committed
501
                    filteredWaters.append(entry)
502
503
504
505
                else:
                    if not any((periodicDistance(lowerSkinPositions[i], pos) < waterCutoff and norm(lowerSkinPositions[i]-pos) > waterCutoff for i in neighbors(pos))):
                        filteredWaters.append(entry)
            addedWaters = filteredWaters
Justin MacCallum's avatar
Justin MacCallum committed
506

Peter Eastman's avatar
Peter Eastman committed
507
        # Add ions to neutralize the system.
Justin MacCallum's avatar
Justin MacCallum committed
508

Peter Eastman's avatar
Peter Eastman committed
509
510
511
512
513
514
515
        def addIon(element):
            # Replace a water by an ion.
            index = random.randint(0, len(addedWaters)-1)
            newResidue = newTopology.addResidue(element.symbol.upper(), newChain)
            newTopology.addAtom(element.symbol, element, newResidue)
            newPositions.append(addedWaters[index][1]*nanometer)
            del addedWaters[index]
516
517
518
519
520
521
        if neutralize:
            totalCharge = int(floor(0.5+sum((nonbonded.getParticleParameters(i)[0].value_in_unit(elementary_charge) for i in range(system.getNumParticles())))))
            if abs(totalCharge) > len(addedWaters):
                raise Exception('Cannot neutralize the system because the charge is greater than the number of available positions for ions')
            for i in range(abs(totalCharge)):
                addIon(positiveElement if totalCharge < 0 else negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
522

Peter Eastman's avatar
Peter Eastman committed
523
        # Add ions based on the desired ionic strength.
Justin MacCallum's avatar
Justin MacCallum committed
524

Peter Eastman's avatar
Peter Eastman committed
525
        numIons = len(addedWaters)*ionicStrength/(55.4*molar) # Pure water is about 55.4 molar (depending on temperature)
526
        numPairs = int(floor(numIons+0.5))
Peter Eastman's avatar
Peter Eastman committed
527
528
529
530
        for i in range(numPairs):
            addIon(positiveElement)
        for i in range(numPairs):
            addIon(negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
531

Peter Eastman's avatar
Peter Eastman committed
532
        # Add the water molecules.
Justin MacCallum's avatar
Justin MacCallum committed
533

Peter Eastman's avatar
Peter Eastman committed
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
        for index, pos in addedWaters:
            newResidue = newTopology.addResidue(residue.name, newChain)
            residue = pdbResidues[index]
            oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
            oPos = pdbPositions[oxygen.index]
            molAtoms = []
            for atom in residue.atoms():
                molAtoms.append(newTopology.addAtom(atom.name, atom.element, newResidue))
                newPositions.append((pos+pdbPositions[atom.index]-oPos)*nanometer)
            for atom1 in molAtoms:
                if atom1.element == elem.oxygen:
                    for atom2 in molAtoms:
                        if atom2.element == elem.hydrogen:
                            newTopology.addBond(atom1, atom2)
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
550

Peter Eastman's avatar
Peter Eastman committed
551
552
553
554
555
556
    class _ResidueData:
        """Inner class used to encapsulate data about the hydrogens for a residue."""
        def __init__(self, name):
            self.name = name
            self.variants = []
            self.hydrogens = []
Justin MacCallum's avatar
Justin MacCallum committed
557

Peter Eastman's avatar
Peter Eastman committed
558
559
560
561
562
563
564
565
    class _Hydrogen:
        """Inner class used to encapsulate data about a hydrogen atom."""
        def __init__(self, name, parent, maxph, variants, terminal):
            self.name = name
            self.parent = parent
            self.maxph = maxph
            self.variants = variants
            self.terminal = terminal
Justin MacCallum's avatar
Justin MacCallum committed
566

567
568
569
    @staticmethod
    def loadHydrogenDefinitions(file):
        """Load an XML file containing definitions of hydrogens that should be added by addHydrogens().
Justin MacCallum's avatar
Justin MacCallum committed
570

571
572
573
574
        The built in hydrogens.xml file containing definitions for standard amino acids and nucleotides is loaded automatically.
        This method can be used to load additional definitions for other residue types.  They will then be used in subsequent
        calls to addHydrogens().
        """
575
576
577
578
579
        tree = etree.parse(file)
        infinity = float('Inf')
        for residue in tree.getroot().findall('Residue'):
            resName = residue.attrib['name']
            data = Modeller._ResidueData(resName)
580
            Modeller._residueHydrogens[resName] = data
581
582
583
584
585
586
587
588
589
590
591
592
593
            for variant in residue.findall('Variant'):
                data.variants.append(variant.attrib['name'])
            for hydrogen in residue.findall('H'):
                maxph = infinity
                if 'maxph' in hydrogen.attrib:
                    maxph = float(hydrogen.attrib['maxph'])
                atomVariants = None
                if 'variant' in hydrogen.attrib:
                    atomVariants = hydrogen.attrib['variant'].split(',')
                terminal = None
                if 'terminal' in hydrogen.attrib:
                    terminal = hydrogen.attrib['terminal']
                data.hydrogens.append(Modeller._Hydrogen(hydrogen.attrib['name'], hydrogen.attrib['parent'], maxph, atomVariants, terminal))
Justin MacCallum's avatar
Justin MacCallum committed
594

595
    def addHydrogens(self, forcefield=None, pH=7.0, variants=None, platform=None):
Peter Eastman's avatar
Peter Eastman committed
596
        """Add missing hydrogens to the model.
Justin MacCallum's avatar
Justin MacCallum committed
597

Peter Eastman's avatar
Peter Eastman committed
598
599
600
        Some residues can exist in multiple forms depending on the pH and properties of the local environment.  These
        variants differ in the presence or absence of particular hydrogens.  In particular, the following variants
        are supported:
Justin MacCallum's avatar
Justin MacCallum committed
601

Peter Eastman's avatar
Peter Eastman committed
602
603
604
        Aspartic acid:
            ASH: Neutral form with a hydrogen on one of the delta oxygens
            ASP: Negatively charged form without a hydrogen on either delta oxygen
Justin MacCallum's avatar
Justin MacCallum committed
605

Peter Eastman's avatar
Peter Eastman committed
606
607
608
        Cysteine:
            CYS: Neutral form with a hydrogen on the sulfur
            CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond)
Justin MacCallum's avatar
Justin MacCallum committed
609

Peter Eastman's avatar
Peter Eastman committed
610
611
612
        Glutamic acid:
            GLH: Neutral form with a hydrogen on one of the epsilon oxygens
            GLU: Negatively charged form without a hydrogen on either epsilon oxygen
Justin MacCallum's avatar
Justin MacCallum committed
613

Peter Eastman's avatar
Peter Eastman committed
614
615
616
617
        Histidine:
            HID: Neutral form with a hydrogen on the ND1 atom
            HIE: Neutral form with a hydrogen on the NE2 atom
            HIP: Positively charged form with hydrogens on both ND1 and NE2
Peter Eastman's avatar
Peter Eastman committed
618
            HIN: Negatively charged form without a hydrogen on either ND1 or NE2
Justin MacCallum's avatar
Justin MacCallum committed
619

Peter Eastman's avatar
Peter Eastman committed
620
621
622
        Lysine:
            LYN: Neutral form with two hydrogens on the zeta nitrogen
            LYS: Positively charged form with three hydrogens on the zeta nitrogen
Justin MacCallum's avatar
Justin MacCallum committed
623

Peter Eastman's avatar
Peter Eastman committed
624
        The variant to use for each residue is determined by the following rules:
Justin MacCallum's avatar
Justin MacCallum committed
625

Peter Eastman's avatar
Peter Eastman committed
626
627
628
        1. The most common variant at the specified pH is selected.
        2. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH.
        3. For a neutral Histidine residue, the HID or HIE variant is selected based on which one forms a better hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
629

Peter Eastman's avatar
Peter Eastman committed
630
631
632
633
634
635
636
637
        You can override these rules by explicitly specifying a variant for any residue.  To do that, provide a list for the
        'variants' parameter, and set the corresponding element to the name of the variant to use.

        A special case is when the model already contains a hydrogen that should not be present in the desired variant.
        If you explicitly specify a variant using the 'variants' parameter, the residue will be modified to match the
        desired variant, removing hydrogens if necessary.  On the other hand, for residues whose variant is selected
        automatically, this function will only add hydrogens.  It will never remove ones that are already present in the
        model, regardless of the specified pH.
Justin MacCallum's avatar
Justin MacCallum committed
638

639
640
641
        Definitions for standard amino acids and nucleotides are built in.  You can call loadHydrogenDefinitions() to load
        additional definitions for other residue types.

Robert McGibbon's avatar
Robert McGibbon committed
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
        Parameters
        ----------
        forcefield : ForceField=None
            the ForceField to use for determining the positions of hydrogens.
            If this is None, positions will be picked which are generally
            reasonable but not optimized for any particular ForceField.
        pH : float=7.0
            the pH based on which to select variants
        variants : list=None
            an optional list of variants to use.  If this is specified, its
            length must equal the number of residues in the model.  variants[i]
            is the name of the variant to use for residue i (indexed starting at
            0). If an element is None, the standard rules will be followed to
            select a variant for that residue.
        platform : Platform=None
            the Platform to use when computing the hydrogen atom positions.  If
            this is None, the default Platform will be used.

        Returns
        -------
662
        list
Robert McGibbon's avatar
Robert McGibbon committed
663
             a list of what variant was actually selected for each residue,
Robert McGibbon's avatar
Robert McGibbon committed
664
             in the same format as the variants parameter
Peter Eastman's avatar
Peter Eastman committed
665
666
        """
        # Check the list of variants.
Justin MacCallum's avatar
Justin MacCallum committed
667

Peter Eastman's avatar
Peter Eastman committed
668
669
670
671
672
673
        residues = list(self.topology.residues())
        if variants is not None:
            if len(variants) != len(residues):
                raise ValueError("The length of the variants list must equal the number of residues")
        else:
            variants = [None]*len(residues)
674
        actualVariants = [None]*len(residues)
Justin MacCallum's avatar
Justin MacCallum committed
675

Peter Eastman's avatar
Peter Eastman committed
676
        # Load the residue specifications.
Justin MacCallum's avatar
Justin MacCallum committed
677

678
679
        if not Modeller._hasLoadedStandardHydrogens:
            Modeller.loadHydrogenDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'hydrogens.xml'))
Peter Eastman's avatar
Peter Eastman committed
680
681

        # Make a list of atoms bonded to each atom.
Justin MacCallum's avatar
Justin MacCallum committed
682

Peter Eastman's avatar
Peter Eastman committed
683
        bonded = {}
684
685
        for atom in self.topology.atoms():
            bonded[atom] = []
Peter Eastman's avatar
Peter Eastman committed
686
687
688
        for atom1, atom2 in self.topology.bonds():
            bonded[atom1].append(atom2)
            bonded[atom2].append(atom1)
Justin MacCallum's avatar
Justin MacCallum committed
689

690
        # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria.
Justin MacCallum's avatar
Justin MacCallum committed
691

692
693
694
695
696
697
698
699
        def isHbond(d, h, a):
            if norm(d-a) > 0.35*nanometer:
                return False
            deltaDH = h-d
            deltaHA = a-h
            deltaDH /= norm(deltaDH)
            deltaHA /= norm(deltaHA)
            return acos(dot(deltaDH, deltaHA)) < 50*degree
Justin MacCallum's avatar
Justin MacCallum committed
700

Peter Eastman's avatar
Peter Eastman committed
701
        # Loop over residues.
Justin MacCallum's avatar
Justin MacCallum committed
702

Peter Eastman's avatar
Peter Eastman committed
703
        newTopology = Topology()
704
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
Peter Eastman's avatar
Peter Eastman committed
705
706
707
708
709
        newAtoms = {}
        newPositions = []*nanometer
        newIndices = []
        acceptors = [atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen)]
        for chain in self.topology.chains():
710
            newChain = newTopology.addChain(chain.id)
Peter Eastman's avatar
Peter Eastman committed
711
            for residue in chain.residues():
712
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Peter Eastman's avatar
Peter Eastman committed
713
714
                isNTerminal = (residue == chain._residues[0])
                isCTerminal = (residue == chain._residues[-1])
715
                if residue.name in Modeller._residueHydrogens:
Peter Eastman's avatar
Peter Eastman committed
716
                    # Add hydrogens.  First select which variant to use.
Justin MacCallum's avatar
Justin MacCallum committed
717

718
                    spec = Modeller._residueHydrogens[residue.name]
Peter Eastman's avatar
Peter Eastman committed
719
720
721
722
                    variant = variants[residue.index]
                    if variant is None:
                        if residue.name == 'CYS':
                            # If this is part of a disulfide, use CYX.
Justin MacCallum's avatar
Justin MacCallum committed
723

Peter Eastman's avatar
Peter Eastman committed
724
725
726
727
                            sulfur = [atom for atom in residue.atoms() if atom.element == elem.sulfur]
                            if len(sulfur) == 1 and any((atom.residue != residue for atom in bonded[sulfur[0]])):
                                variant = 'CYX'
                        if residue.name == 'HIS' and pH > 6.5:
728
                            # See if either nitrogen already has a hydrogen attached.
Justin MacCallum's avatar
Justin MacCallum committed
729

730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
                            nd1 = [atom for atom in residue.atoms() if atom.name == 'ND1']
                            ne2 = [atom for atom in residue.atoms() if atom.name == 'NE2']
                            if len(nd1) != 1 or len(ne2) != 1:
                                raise ValueError('HIS residue (%d) has the wrong set of atoms' % residue.index)
                            nd1 = nd1[0]
                            ne2 = ne2[0]
                            nd1HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[nd1]))
                            ne2HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[ne2]))
                            if nd1HasHydrogen and ne2HasHydrogen:
                                variant = 'HIP'
                            elif nd1HasHydrogen:
                                variant = 'HID'
                            elif ne2HasHydrogen:
                                variant = 'HIE'
                            else:
                                # Estimate the hydrogen positions.
Justin MacCallum's avatar
Justin MacCallum committed
746

747
748
749
750
751
752
753
754
755
756
757
758
759
760
                                nd1Pos = self.positions[nd1.index]
                                ne2Pos = self.positions[ne2.index]
                                hd1Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[nd1]:
                                    hd1Delta += nd1Pos-self.positions[other.index]
                                hd1Delta *= 0.1*nanometer/norm(hd1Delta)
                                hd1Pos = nd1Pos+hd1Delta
                                he2Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[ne2]:
                                    he2Delta += ne2Pos-self.positions[other.index]
                                he2Delta *= 0.1*nanometer/norm(he2Delta)
                                he2Pos = ne2Pos+he2Delta

                                # See whether either hydrogen would form a hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
761

762
763
764
765
766
767
768
769
770
771
772
773
774
775
                                nd1IsBonded = False
                                ne2IsBonded = False
                                for acceptor in acceptors:
                                    if acceptor.residue != residue:
                                        acceptorPos = self.positions[acceptor.index]
                                        if isHbond(nd1Pos, hd1Pos, acceptorPos):
                                            nd1IsBonded = True
                                            break
                                        if isHbond(ne2Pos, he2Pos, acceptorPos):
                                            ne2IsBonded = True
                                if ne2IsBonded and not nd1IsBonded:
                                    variant = 'HIE'
                                else:
                                    variant = 'HID'
Peter Eastman's avatar
Peter Eastman committed
776
777
778
779
                        elif residue.name == 'HIS':
                            variant = 'HIP'
                    if variant is not None and variant not in spec.variants:
                        raise ValueError('Illegal variant for %s residue: %s' % (residue.name, variant))
780
                    actualVariants[residue.index] = variant
Peter Eastman's avatar
Peter Eastman committed
781
                    removeExtraHydrogens = (variants[residue.index] is not None)
Justin MacCallum's avatar
Justin MacCallum committed
782

Peter Eastman's avatar
Peter Eastman committed
783
                    # Make a list of hydrogens that should be present in the residue.
Justin MacCallum's avatar
Justin MacCallum committed
784

Peter Eastman's avatar
Peter Eastman committed
785
786
787
788
789
                    parents = [atom for atom in residue.atoms() if atom.element != elem.hydrogen]
                    parentNames = [atom.name for atom in parents]
                    hydrogens = [h for h in spec.hydrogens if (variant is None and pH <= h.maxph) or (h.variants is None and pH <= h.maxph) or (h.variants is not None and variant in h.variants)]
                    hydrogens = [h for h in hydrogens if h.terminal is None or (isNTerminal and h.terminal == 'N') or (isCTerminal and h.terminal == 'C')]
                    hydrogens = [h for h in hydrogens if h.parent in parentNames]
Justin MacCallum's avatar
Justin MacCallum committed
790

Peter Eastman's avatar
Peter Eastman committed
791
                    # Loop over atoms in the residue, adding them to the new topology along with required hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
792

Peter Eastman's avatar
Peter Eastman committed
793
                    for parent in residue.atoms():
Peter Eastman's avatar
Peter Eastman committed
794
795
796
797
798
                        # Check whether this is a hydrogen that should be removed.

                        if removeExtraHydrogens and parent.element == elem.hydrogen and not any(parent.name == h.name for h in hydrogens):
                            continue

Peter Eastman's avatar
Peter Eastman committed
799
                        # Add the atom.
Justin MacCallum's avatar
Justin MacCallum committed
800

Peter Eastman's avatar
Peter Eastman committed
801
802
803
804
805
                        newAtom = newTopology.addAtom(parent.name, parent.element, newResidue)
                        newAtoms[parent] = newAtom
                        newPositions.append(deepcopy(self.positions[parent.index]))
                        if parent in parents:
                            # Match expected hydrogens with existing ones and find which ones need to be added.
Justin MacCallum's avatar
Justin MacCallum committed
806

Peter Eastman's avatar
Peter Eastman committed
807
808
809
810
                            existing = [atom for atom in bonded[parent] if atom.element == elem.hydrogen]
                            expected = [h for h in hydrogens if h.parent == parent.name]
                            if len(existing) < len(expected):
                                # Try to match up existing hydrogens to expected ones.
Justin MacCallum's avatar
Justin MacCallum committed
811

Peter Eastman's avatar
Peter Eastman committed
812
813
814
815
816
817
818
819
                                matches = []
                                for e in existing:
                                    match = [h for h in expected if h.name == e.name]
                                    if len(match) > 0:
                                        matches.append(match[0])
                                        expected.remove(match[0])
                                    else:
                                        matches.append(None)
Justin MacCallum's avatar
Justin MacCallum committed
820

Peter Eastman's avatar
Peter Eastman committed
821
                                # If any hydrogens couldn't be matched by name, just match them arbitrarily.
Justin MacCallum's avatar
Justin MacCallum committed
822

Peter Eastman's avatar
Peter Eastman committed
823
824
825
826
                                for i in range(len(matches)):
                                    if matches[i] is None:
                                        matches[i] = expected[-1]
                                        expected.remove(expected[-1])
Justin MacCallum's avatar
Justin MacCallum committed
827

Peter Eastman's avatar
Peter Eastman committed
828
                                # Add the missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
829

Peter Eastman's avatar
Peter Eastman committed
830
831
832
                                for h in expected:
                                    newH = newTopology.addAtom(h.name, elem.hydrogen, newResidue)
                                    newIndices.append(newH.index)
833
834
                                    delta = Vec3(0, 0, 0)*nanometer
                                    if len(bonded[parent]) > 0:
Peter Eastman's avatar
Peter Eastman committed
835
836
837
838
                                        for other in bonded[parent]:
                                            delta += self.positions[parent.index]-self.positions[other.index]
                                    else:
                                        delta = Vec3(random.random(), random.random(), random.random())*nanometer
839
                                    delta *= 0.1*nanometer/norm(delta)
840
841
                                    delta += 0.05*Vec3(random.random(), random.random(), random.random())*nanometer
                                    delta *= 0.1*nanometer/norm(delta)
Peter Eastman's avatar
Peter Eastman committed
842
843
844
845
                                    newPositions.append(self.positions[parent.index]+delta)
                                    newTopology.addBond(newAtom, newH)
                else:
                    # Just copy over the residue.
Justin MacCallum's avatar
Justin MacCallum committed
846

Peter Eastman's avatar
Peter Eastman committed
847
848
849
850
851
852
853
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
854

855
856
857
858
        # The hydrogens were added at random positions.  Now perform an energy minimization to fix them up.

        if forcefield is not None:
            # Use the ForceField the user specified.
Robert McGibbon's avatar
Robert McGibbon committed
859

860
861
862
863
864
865
866
867
868
            system = forcefield.createSystem(newTopology, rigidWater=False)
            atoms = list(newTopology.atoms())
            for i in range(system.getNumParticles()):
                if atoms[i].element != elem.hydrogen:
                    # This is a heavy atom, so make it immobile.
                    system.setParticleMass(i, 0)
        else:
            # Create a System that restrains the distance of each hydrogen from its parent atom
            # and causes hydrogens to spread out evenly.
Robert McGibbon's avatar
Robert McGibbon committed
869

870
            system = System()
871
            nonbonded = CustomNonbondedForce('100/((r/0.1)^4+1)')
872
873
874
875
876
            bonds = HarmonicBondForce()
            angles = HarmonicAngleForce()
            system.addForce(nonbonded)
            system.addForce(bonds)
            system.addForce(angles)
877
            bondedTo = []
878
879
880
881
882
883
            for atom in newTopology.atoms():
                nonbonded.addParticle([])
                if atom.element != elem.hydrogen:
                    system.addParticle(0.0)
                else:
                    system.addParticle(1.0)
884
                bondedTo.append([])
885
886
887
            for atom1, atom2 in newTopology.bonds():
                if atom1.element == elem.hydrogen or atom2.element == elem.hydrogen:
                    bonds.addBond(atom1.index, atom2.index, 0.1, 100000.0)
888
889
                bondedTo[atom1.index].append(atom2)
                bondedTo[atom2.index].append(atom1)
890
891
            for residue in newTopology.residues():
                if residue.name == 'HOH':
892
                    # Add an angle term to make the water geometry correct.
Robert McGibbon's avatar
Robert McGibbon committed
893

894
895
896
897
898
                    atoms = list(residue.atoms())
                    oindex = [i for i in range(len(atoms)) if atoms[i].element == elem.oxygen]
                    if len(atoms) == 3 and len(oindex) == 1:
                        hindex = list(set([0,1,2])-set(oindex))
                        angles.addAngle(atoms[hindex[0]].index, atoms[oindex[0]].index, atoms[hindex[1]].index, 1.824, 836.8)
899
900
                else:
                    # Add angle terms for any hydroxyls.
Robert McGibbon's avatar
Robert McGibbon committed
901

902
903
904
905
                    for atom in residue.atoms():
                        index = atom.index
                        if atom.element == elem.oxygen and len(bondedTo[index]) == 2 and elem.hydrogen in (a.element for a in bondedTo[index]):
                            angles.addAngle(bondedTo[index][0].index, index, bondedTo[index][1].index, 1.894, 460.24)
Robert McGibbon's avatar
Robert McGibbon committed
906

907
908
909
910
        if platform is None:
            context = Context(system, VerletIntegrator(0.0))
        else:
            context = Context(system, VerletIntegrator(0.0), platform)
Peter Eastman's avatar
Peter Eastman committed
911
        context.setPositions(newPositions)
912
        LocalEnergyMinimizer.minimize(context, 1.0, 50)
Peter Eastman's avatar
Peter Eastman committed
913
914
        self.topology = newTopology
        self.positions = context.getState(getPositions=True).getPositions()
915
        del context
916
        return actualVariants
Justin MacCallum's avatar
Justin MacCallum committed
917

918
    def addExtraParticles(self, forcefield):
Robert McGibbon's avatar
Robert McGibbon committed
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
        """Add missing extra particles to the model that are required by a force
        field.

        Some force fields use "extra particles" that do not represent
        actual atoms, but still need to be included in the System.  Examples
        include lone pairs, Drude particles, and the virtual sites used in some
        water models to adjust the charge distribution.  Extra particles can be
        recognized by the fact that their element is None.

        This method is primarily used to add extra particles, but it can also
        remove them.  It tries to match every residue in the Topology to a
        template in the force field.  If there is no match, it will both add
        and remove extra particles as necessary to make it match.

        Parameters
        ----------
        forcefield : ForceField
            the ForceField defining what extra particles should be present
937
938
        """
        # Create copies of all residue templates that have had all extra points removed.
Justin MacCallum's avatar
Justin MacCallum committed
939

940
        templatesNoEP = {}
941
        for resName, template  in forcefield._templates.items():
942
943
944
945
946
947
948
949
            if any(atom.element is None for atom in template.atoms):
                index = 0
                newIndex = {}
                newTemplate = ForceField._TemplateData(resName)
                for i, atom in enumerate(template.atoms):
                    if atom.element is not None:
                        newIndex[i] = index
                        index += 1
950
951
952
                        newAtom = ForceField._TemplateAtomData(atom.name, atom.type, atom.element)
                        newAtom.externalBonds = atom.externalBonds
                        newTemplate.atoms.append(newAtom)
953
954
955
956
957
958
959
960
961
                for b1, b2 in template.bonds:
                    if b1 in newIndex and b2 in newIndex:
                        newTemplate.bonds.append((newIndex[b1], newIndex[b2]))
                        newTemplate.atoms[newIndex[b1]].bondedTo.append(newIndex[b2])
                        newTemplate.atoms[newIndex[b2]].bondedTo.append(newIndex[b1])
                for b in template.externalBonds:
                    if b in newIndex:
                        newTemplate.externalBonds.append(newIndex[b])
                templatesNoEP[template] = newTemplate
Justin MacCallum's avatar
Justin MacCallum committed
962

963
        # Record which atoms are bonded to each other atom, with and without extra particles.
Justin MacCallum's avatar
Justin MacCallum committed
964

965
966
967
968
        bondedToAtom = []
        bondedToAtomNoEP = []
        for atom in self.topology.atoms():
            bondedToAtom.append(set())
peastman's avatar
peastman committed
969
            bondedToAtomNoEP.append(set())
970
971
972
973
974
975
        for atom1, atom2 in self.topology.bonds():
            bondedToAtom[atom1.index].add(atom2.index)
            bondedToAtom[atom2.index].add(atom1.index)
            if atom1.element is not None and atom2.element is not None:
                bondedToAtomNoEP[atom1.index].add(atom2.index)
                bondedToAtomNoEP[atom2.index].add(atom1.index)
Justin MacCallum's avatar
Justin MacCallum committed
976

peastman's avatar
peastman committed
977
978
        # If the force field has a DrudeForce, record the types of Drude particles and their parents since we'll
        # need them for picking particle positions.
Justin MacCallum's avatar
Justin MacCallum committed
979

peastman's avatar
peastman committed
980
981
982
983
984
        drudeTypeMap = {}
        for force in forcefield._forces:
            if isinstance(force, DrudeGenerator):
                for type in force.typeMap:
                    drudeTypeMap[type] = force.typeMap[type][0]
Justin MacCallum's avatar
Justin MacCallum committed
985

986
        # Create the new Topology.
Justin MacCallum's avatar
Justin MacCallum committed
987

988
        newTopology = Topology()
989
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
990
991
        newAtoms = {}
        newPositions = []*nanometer
992
        missingPositions = set()
993
        for chain in self.topology.chains():
994
            newChain = newTopology.addChain(chain.id)
995
            for residue in chain.residues():
996
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Justin MacCallum's avatar
Justin MacCallum committed
997

998
                # Look for a matching template.
Justin MacCallum's avatar
Justin MacCallum committed
999

1000
1001
1002
1003
1004
1005
1006
1007
                matchFound = False
                signature = _createResidueSignature([atom.element for atom in residue.atoms()])
                if signature in forcefield._templateSignatures:
                    for t in forcefield._templateSignatures[signature]:
                        if _matchResidue(residue, t, bondedToAtom) is not None:
                            matchFound = True
                if matchFound:
                    # Just copy the residue over.
Justin MacCallum's avatar
Justin MacCallum committed
1008

1009
1010
1011
1012
1013
1014
1015
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
                else:
                    # There's no matching template.  Try to find one that matches based on everything except
                    # extra points.
Justin MacCallum's avatar
Justin MacCallum committed
1016

1017
                    template = None
1018
                    residueNoEP = Residue(residue.name, residue.index, residue.chain, residue.id)
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
                    residueNoEP._atoms = [atom for atom in residue.atoms() if atom.element is not None]
                    if signature in forcefield._templateSignatures:
                        for t in forcefield._templateSignatures[signature]:
                            if t in templatesNoEP:
                                matches = _matchResidue(residueNoEP, templatesNoEP[t], bondedToAtomNoEP)
                                if matches is not None:
                                    template = t;
                                    # Record the corresponding atoms.
                                    matchingAtoms = {}
                                    for atom, match in zip(residueNoEP.atoms(), matches):
peastman's avatar
peastman committed
1029
                                        templateAtomName = templatesNoEP[t].atoms[match].name
1030
1031
1032
1033
1034
1035
                                        for templateAtom in template.atoms:
                                            if templateAtom.name == templateAtomName:
                                                matchingAtoms[templateAtom] = atom
                                    break
                    if template is None:
                        raise ValueError('Residue %d (%s) does not match any template defined by the ForceField.' % (residue.index+1, residue.name))
Justin MacCallum's avatar
Justin MacCallum committed
1036

1037
                    # Add the regular atoms.
Justin MacCallum's avatar
Justin MacCallum committed
1038

1039
1040
1041
1042
                    for atom in residue.atoms():
                        if atom.element is not None:
                            newAtoms[atom] = newTopology.addAtom(atom.name, atom.element, newResidue)
                            newPositions.append(deepcopy(self.positions[atom.index]))
Justin MacCallum's avatar
Justin MacCallum committed
1043

1044
                    # Add the extra points.
Justin MacCallum's avatar
Justin MacCallum committed
1045

1046
1047
1048
                    templateAtomPositions = len(template.atoms)*[None]
                    for index, atom in enumerate(template.atoms):
                        if atom in matchingAtoms:
peastman's avatar
peastman committed
1049
                            templateAtomPositions[index] = self.positions[matchingAtoms[atom].index].value_in_unit(nanometer)
1050
                    newExtraPoints = {}
1051
1052
                    for index, atom in enumerate(template.atoms):
                        if atom.element is None:
1053
                            newExtraPoints[atom] = newTopology.addAtom(atom.name, None, newResidue)
1054
1055
1056
1057
                            position = None
                            for site in template.virtualSites:
                                if site.index == index:
                                    # This is a virtual site.  Compute its position by the correct rule.
Justin MacCallum's avatar
Justin MacCallum committed
1058

1059
                                    if site.type == 'average2':
Matthew Harrigan's avatar
Matthew Harrigan committed
1060
                                        position = site.weights[0]*templateAtomPositions[site.atoms[0]] + site.weights[1]*templateAtomPositions[site.atoms[1]]
1061
                                    elif site.type == 'average3':
Matthew Harrigan's avatar
Matthew Harrigan committed
1062
                                        position = site.weights[0]*templateAtomPositions[site.atoms[0]] + site.weights[1]*templateAtomPositions[site.atoms[1]] + site.weights[2]*templateAtomPositions[site.atoms[2]]
1063
                                    elif site.type == 'outOfPlane':
Matthew Harrigan's avatar
Matthew Harrigan committed
1064
1065
                                        v1 = templateAtomPositions[site.atoms[1]] - templateAtomPositions[site.atoms[0]]
                                        v2 = templateAtomPositions[site.atoms[2]] - templateAtomPositions[site.atoms[0]]
peastman's avatar
peastman committed
1066
                                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Matthew Harrigan's avatar
Matthew Harrigan committed
1067
                                        position = templateAtomPositions[site.atoms[0]] + site.weights[0]*v1 + site.weights[1]*v2 + site.weights[2]*cross
1068
1069
1070
1071
1072
1073
1074
1075
1076
                                    elif site.type == 'localCoords':
                                        origin = templateAtomPositions[site.atoms[0]]*site.originWeights[0] + templateAtomPositions[site.atoms[1]]*site.originWeights[1] + templateAtomPositions[site.atoms[2]]*site.originWeights[2];
                                        xdir = templateAtomPositions[site.atoms[0]]*site.xWeights[0] + templateAtomPositions[site.atoms[1]]*site.xWeights[1] + templateAtomPositions[site.atoms[2]]*site.xWeights[2];
                                        ydir = templateAtomPositions[site.atoms[0]]*site.yWeights[0] + templateAtomPositions[site.atoms[1]]*site.yWeights[1] + templateAtomPositions[site.atoms[2]]*site.yWeights[2];
                                        zdir = Vec3(xdir[1]*ydir[2]-xdir[2]*ydir[1], xdir[2]*ydir[0]-xdir[0]*ydir[2], xdir[0]*ydir[1]-xdir[1]*ydir[0])
                                        xdir /= norm(xdir);
                                        zdir /= norm(zdir);
                                        ydir = Vec3(zdir[1]*xdir[2]-zdir[2]*xdir[1], zdir[2]*xdir[0]-zdir[0]*xdir[2], zdir[0]*xdir[1]-zdir[1]*xdir[0])
                                        position = origin + xdir*site.localPos[0] + ydir*site.localPos[1] + zdir*site.localPos[2];
peastman's avatar
peastman committed
1077
1078
                            if position is None and atom.type in drudeTypeMap:
                                # This is a Drude particle.  Put it on top of its parent atom.
Justin MacCallum's avatar
Justin MacCallum committed
1079

peastman's avatar
peastman committed
1080
1081
1082
                                for atom2, pos in zip(template.atoms, templateAtomPositions):
                                    if atom2.type in drudeTypeMap[atom.type]:
                                        position = deepcopy(pos)
1083
                            if position is None:
1084
1085
                                # We couldn't figure out the correct position.  Put it at a random position near the center of the residue,
                                # and we'll try to fix it later based on bonds.
Justin MacCallum's avatar
Justin MacCallum committed
1086

1087
                                knownPositions = [x for x in templateAtomPositions if x is not None]
1088
1089
                                position = Vec3(random.gauss(0, 1), random.gauss(0, 1), random.gauss(0, 1))+(unit.sum(knownPositions)/len(knownPositions))
                                missingPositions.add(len(newPositions))
peastman's avatar
peastman committed
1090
                            newPositions.append(position*nanometer)
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100

                    # Add bonds involving the extra points.

                    for atom1, atom2 in template.bonds:
                        atom1 = template.atoms[atom1]
                        atom2 = template.atoms[atom2]
                        if atom1 in newExtraPoints or atom2 in newExtraPoints:
                            if atom1 in newExtraPoints:
                                a1 = newExtraPoints[atom1]
                            else:
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
1101
                                a1 = newAtoms[matchingAtoms[atom1]]
1102
1103
1104
                            if atom2 in newExtraPoints:
                                a2 = newExtraPoints[atom2]
                            else:
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
1105
                                a2 = newAtoms[matchingAtoms[atom2]]
1106
                            newTopology.addBond(a1, a2)
1107

1108
1109
1110
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124

        if len(missingPositions) > 0:
            # There were particles whose position we couldn't identify before, since they were neither virtual sites nor Drude particles.
            # Try to figure them out based on bonds.  First, use the ForceField to create a list of every bond involving one of them.

            system = forcefield.createSystem(newTopology, constraints=AllBonds)
            bonds = []
            for i in range(system.getNumConstraints()):
                bond = system.getConstraintParameters(i)
                if bond[0] in missingPositions or bond[1] in missingPositions:
                    bonds.append(bond)

            # Now run a few iterations of SHAKE to try to select reasonable positions.

1125
            for iteration in range(15):
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
                for atom1, atom2, distance in bonds:
                    if atom1 in missingPositions:
                        if atom2 in missingPositions:
                            weights = (0.5, 0.5)
                        else:
                            weights = (1.0, 0.0)
                    else:
                        weights = (0.0, 1.0)
                    delta = newPositions[atom2]-newPositions[atom1]
                    length = norm(delta)
                    delta *= (distance-length)/length
                    newPositions[atom1] -= weights[0]*delta
                    newPositions[atom2] += weights[1]*delta
1139

1140
        self.topology = newTopology
1141
        self.positions = newPositions