modeller.py 59.4 KB
Newer Older
1
2
"""
modeller.py: Provides tools for editing molecular models
3
4
5
6
7
8

This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.

Peter Eastman's avatar
Peter Eastman committed
9
Portions copyright (c) 2012-2016 Stanford University and the Authors.
10
11
12
Authors: Peter Eastman
Contributors:

Justin MacCallum's avatar
Justin MacCallum committed
13
Permission is hereby granted, free of charge, to any person obtaining a
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
30
"""
Peter Eastman's avatar
Peter Eastman committed
31
from __future__ import division
32
from __future__ import absolute_import
Peter Eastman's avatar
Peter Eastman committed
33

34
35
36
__author__ = "Peter Eastman"
__version__ = "1.0"

37
from simtk.openmm.app import Topology, PDBFile, ForceField
38
from simtk.openmm.app.forcefield import HAngles, AllBonds, CutoffNonPeriodic, _createResidueSignature, _matchResidue, DrudeGenerator
39
from simtk.openmm.app.topology import Residue
40
from simtk.openmm.vec3 import Vec3
41
from simtk.openmm import System, Context, NonbondedForce, CustomNonbondedForce, HarmonicBondForce, HarmonicAngleForce, VerletIntegrator, LocalEnergyMinimizer
42
from simtk.unit import nanometer, molar, elementary_charge, amu, gram, liter, degree, sqrt, acos, is_quantity, dot, norm
43
import simtk.unit as unit
44
from . import element as elem
Peter Eastman's avatar
Peter Eastman committed
45
46
import os
import random
Peter Eastman's avatar
Peter Eastman committed
47
import xml.etree.ElementTree as etree
Peter Eastman's avatar
Peter Eastman committed
48
49
from copy import deepcopy
from math import ceil, floor
50
51
52

class Modeller(object):
    """Modeller provides tools for editing molecular models, such as adding water or missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
53

54
55
56
57
58
    To use it, create a Modeller object, specifying the initial Topology and atom positions.  You can
    then call various methods to change the model in different ways.  Each time you do, a new Topology
    and list of coordinates is created to represent the changed model.  Finally, call getTopology()
    and getPositions() to get the results.
    """
Justin MacCallum's avatar
Justin MacCallum committed
59

60
61
    _residueHydrogens = {}
    _hasLoadedStandardHydrogens = False
Peter Eastman's avatar
Peter Eastman committed
62

63
64
    def __init__(self, topology, positions):
        """Create a new Modeller object
Justin MacCallum's avatar
Justin MacCallum committed
65

Robert McGibbon's avatar
Robert McGibbon committed
66
67
68
69
70
71
        Parameters
        ----------
        topology : Topology
            the initial Topology of the model
        positions : list
            the initial atomic positions
72
        """
73
        ## The Topology describing the structure of the system
74
        self.topology = topology
Peter Eastman's avatar
Peter Eastman committed
75
        if not is_quantity(positions):
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
76
            positions = positions*nanometer
77
        ## The list of atom positions
78
        self.positions = positions
Justin MacCallum's avatar
Justin MacCallum committed
79

80
81
82
    def getTopology(self):
        """Get the Topology of the model."""
        return self.topology
Justin MacCallum's avatar
Justin MacCallum committed
83

84
85
86
87
    def getPositions(self):
        """Get the atomic positions."""
        return self.positions

88
89
    def add(self, addTopology, addPositions):
        """Add chains, residues, atoms, and bonds to the model.
Justin MacCallum's avatar
Justin MacCallum committed
90

Robert McGibbon's avatar
Robert McGibbon committed
91
92
93
        Specify what to add by providing a new Topology object and the
        corresponding atomic positions. All chains, residues, atoms, and bonds
        contained in the Topology are added to the model.
Justin MacCallum's avatar
Justin MacCallum committed
94

Robert McGibbon's avatar
Robert McGibbon committed
95
96
97
98
99
100
        Parameters
        ----------
        addTopology : Topology
            a Topology whose contents should be added to the model
        addPositions : list
            the positions of the atoms to add
101
102
        """
        # Copy over the existing model.
Justin MacCallum's avatar
Justin MacCallum committed
103

104
        newTopology = Topology()
105
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
106
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
107
        newPositions = []*nanometer
108
        for chain in self.topology.chains():
109
            newChain = newTopology.addChain(chain.id)
110
            for residue in chain.residues():
111
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
112
                for atom in residue.atoms():
113
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
114
115
116
117
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
118

119
120
121
122
        # Add the new model

        newAtoms = {}
        for chain in addTopology.chains():
123
            newChain = newTopology.addChain(chain.id)
124
            for residue in chain.residues():
125
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
126
                for atom in residue.atoms():
127
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
128
129
130
131
132
133
134
135
136
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(addPositions[atom.index]))
        for bond in addTopology.bonds():
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions

    def delete(self, toDelete):
        """Delete chains, residues, atoms, and bonds from the model.
Justin MacCallum's avatar
Justin MacCallum committed
137

138
139
140
141
        You can specify objects to delete at any granularity: atoms, residues, or chains.  Passing
        in an Atom object causes that Atom to be deleted.  Passing in a Residue object causes that
        Residue and all Atoms it contains to be deleted.  Passing in a Chain object causes that
        Chain and all Residues and Atoms it contains to be deleted.
Justin MacCallum's avatar
Justin MacCallum committed
142

143
144
145
        In all cases, when an Atom is deleted, any bonds it participates in are also deleted.
        You also can specify a bond (as a tuple of Atom objects) to delete just that bond without
        deleting the Atoms it connects.
Justin MacCallum's avatar
Justin MacCallum committed
146

Robert McGibbon's avatar
Robert McGibbon committed
147
148
149
150
151
        Parameters
        ----------
        toDelete : list
            a list of Atoms, Residues, Chains, and bonds (specified as tuples of
            Atoms) to delete
152
153
        """
        newTopology = Topology()
154
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
155
156
157
158
159
        newAtoms = {}
        newPositions = []*nanometer
        deleteSet = set(toDelete)
        for chain in self.topology.chains():
            if chain not in deleteSet:
160
                needNewChain = True;
161
162
                for residue in chain.residues():
                    if residue not in deleteSet:
163
                        needNewResidue = True
164
165
                        for atom in residue.atoms():
                            if atom not in deleteSet:
166
                                if needNewChain:
167
                                    newChain = newTopology.addChain(chain.id)
168
169
                                    needNewChain = False;
                                if needNewResidue:
170
                                    newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
171
                                    needNewResidue = False;
172
                                newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
173
174
                                newAtoms[atom] = newAtom
                                newPositions.append(deepcopy(self.positions[atom.index]))
175
176
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
177
178
                if bond not in deleteSet and (bond[1], bond[0]) not in deleteSet:
                    newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
179
180
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
181

182
183
184
    def deleteWater(self):
        """Delete all water molecules from the model."""
        self.delete(res for res in self.topology.residues() if res.name == "HOH")
Justin MacCallum's avatar
Justin MacCallum committed
185

186
187
    def convertWater(self, model='tip3p'):
        """Convert all water molecules to a different water model.
Justin MacCallum's avatar
Justin MacCallum committed
188

189
190
191
        @deprecated Use addExtraParticles() instead.  It performs the same
        function but in a more general way.

192
193
194
195
196
        Parameters
        ----------
        model : string='tip3p'
            the water model to convert to.  Supported values are 'tip3p',
            'spce', 'tip4pew', and 'tip5p'.
197
        """
Peter Eastman's avatar
Peter Eastman committed
198
        if model in ('tip3p', 'spce'):
199
200
201
202
203
204
205
206
            sites = 3
        elif model == 'tip4pew':
            sites = 4
        elif model == 'tip5p':
            sites = 5
        else:
            raise ValueError('Unknown water model: %s' % model)
        newTopology = Topology()
207
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
208
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
209
        newPositions = []*nanometer
210
        for chain in self.topology.chains():
211
            newChain = newTopology.addChain(chain.id)
212
            for residue in chain.residues():
213
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
214
215
216
217
218
219
220
221
222
223
224
225
                if residue.name == "HOH":
                    # Copy the oxygen and hydrogens
                    oatom = [atom for atom in residue.atoms() if atom.element == elem.oxygen]
                    hatoms = [atom for atom in residue.atoms() if atom.element == elem.hydrogen]
                    if len(oatom) != 1 or len(hatoms) != 2:
                        raise ValueError('Illegal water molecule (residue %d): contains %d oxygen(s) and %d hydrogen(s)' % (residue.index, len(oatom), len(hatoms)))
                    o = newTopology.addAtom(oatom[0].name, oatom[0].element, newResidue)
                    h1 = newTopology.addAtom(hatoms[0].name, hatoms[0].element, newResidue)
                    h2 = newTopology.addAtom(hatoms[1].name, hatoms[1].element, newResidue)
                    newAtoms[oatom[0]] = o
                    newAtoms[hatoms[0]] = h1
                    newAtoms[hatoms[1]] = h2
Peter Eastman's avatar
Peter Eastman committed
226
227
228
                    po = deepcopy(self.positions[oatom[0].index])
                    ph1 = deepcopy(self.positions[hatoms[0].index])
                    ph2 = deepcopy(self.positions[hatoms[1].index])
229
230
231
                    newPositions.append(po)
                    newPositions.append(ph1)
                    newPositions.append(ph2)
Justin MacCallum's avatar
Justin MacCallum committed
232

233
                    # Add virtual sites.
Justin MacCallum's avatar
Justin MacCallum committed
234

235
236
                    if sites == 4:
                        newTopology.addAtom('M', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
237
                        newPositions.append(0.786646558*po + 0.106676721*ph1 + 0.106676721*ph2)
238
239
240
                    elif sites == 5:
                        newTopology.addAtom('M1', None, newResidue)
                        newTopology.addAtom('M2', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
241
242
                        v1 = (ph1-po).value_in_unit(nanometer)
                        v2 = (ph2-po).value_in_unit(nanometer)
243
                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Peter Eastman's avatar
Peter Eastman committed
244
245
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 - 6.4437903*cross)*nanometer)
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 + 6.4437903*cross)*nanometer)
246
247
248
                else:
                    # Just copy the residue over.
                    for atom in residue.atoms():
249
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
250
                        newAtoms[atom] = newAtom
Peter Eastman's avatar
Peter Eastman committed
251
                        newPositions.append(deepcopy(self.positions[atom.index]))
252
253
254
255
256
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions
Peter Eastman's avatar
Peter Eastman committed
257

258
    def addSolvent(self, forcefield, model='tip3p', boxSize=None, boxVectors=None, padding=None, numAdded=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar, neutralize=True):
Peter Eastman's avatar
Peter Eastman committed
259
        """Add solvent (both water and ions) to the model to fill a rectangular box.
Justin MacCallum's avatar
Justin MacCallum committed
260

Peter Eastman's avatar
Peter Eastman committed
261
        The algorithm works as follows:
Robert McGibbon's avatar
Robert McGibbon committed
262

Peter Eastman's avatar
Peter Eastman committed
263
264
        1. Water molecules are added to fill the box.
        2. Water molecules are removed if their distance to any solute atom is less than the sum of their van der Waals radii.
265
        3. If the solute is charged and neutralize=True, enough positive or negative ions are added to neutralize it.  Each ion is added by
Peter Eastman's avatar
Peter Eastman committed
266
           randomly selecting a water molecule and replacing it with the ion.
267
        4. Ion pairs are added to give the requested total ionic strength.  Note that only monovalent ions are currently supported.
Justin MacCallum's avatar
Justin MacCallum committed
268

269
        The box size can be specified in any of several ways:
Robert McGibbon's avatar
Robert McGibbon committed
270

271
272
273
        1. You can explicitly give the vectors defining the periodic box to use.
        2. Alternatively, for a rectangular box you can simply give the dimensions of the unit cell.
        3. You can give a padding distance.  The largest dimension of the solute (along the x, y, or z axis) is determined, and a cubic
274
           box of size (largest dimension)+2*padding is used.
275
        4. You can specify the total number of molecules (both waters and ions) to add.  A cubic box is then created whose size is
Robert McGibbon's avatar
Robert McGibbon committed
276
           just large enough hold the specified amount of solvent.
277
        5. Finally, if none of the above options is specified, the existing Topology's box vectors are used.
Justin MacCallum's avatar
Justin MacCallum committed
278

Robert McGibbon's avatar
Robert McGibbon committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
        Parameters
        ----------
        forcefield : ForceField
            the ForceField to use for determining van der Waals radii and atomic charges
        model : str='tip3p'
            the water model to use.  Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
        boxSize : Vec3=None
            the size of the box to fill with water
        boxVectors : tuple of Vec3=None
            the vectors defining the periodic box to fill with water
        padding : distance=None
            the padding distance to use
        numAdded : int=None
            the total number of molecules (waters and ions) to add
        positiveIon : string='Na+'
            the type of positive ion to add.  Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'
        negativeIon : string='Cl-'
            the type of negative ion to add.  Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware
            that not all force fields support all ion types.
        ionicStrength : concentration=0*molar
            the total concentration of ions (both positive and negative) to add.  This
            does not include ions that are added to neutralize the system.
301
            Note that only monovalent ions are currently supported.
Robert McGibbon's avatar
Robert McGibbon committed
302
303
        neutralize : bool=True
            whether to add ions to neutralize the system
Peter Eastman's avatar
Peter Eastman committed
304
        """
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
        if len([x for x in (boxSize, boxVectors, padding, numAdded) if x is not None]) > 1:
            raise ValueError('At most one of the following arguments may be specified: boxSize, boxVectors, padding, numAdded')

        # Load the pre-equilibrated water box.

        vdwRadiusPerSigma = 0.5612310241546864907
        if model == 'tip3p':
            waterRadius = 0.31507524065751241*vdwRadiusPerSigma
        elif model == 'spce':
            waterRadius = 0.31657195050398818*vdwRadiusPerSigma
        elif model == 'tip4pew':
            waterRadius = 0.315365*vdwRadiusPerSigma
        elif model == 'tip5p':
            waterRadius = 0.312*vdwRadiusPerSigma
        else:
            raise ValueError('Unknown water model: %s' % model)
        pdb = PDBFile(os.path.join(os.path.dirname(__file__), 'data', model+'.pdb'))
        pdbTopology = pdb.getTopology()
        pdbPositions = pdb.getPositions().value_in_unit(nanometer)
        pdbResidues = list(pdbTopology.residues())
Robert McGibbon's avatar
Robert McGibbon committed
325
326
        pdbBoxSize = pdbTopology.getUnitCellDimensions().value_in_unit(nanometer)

Peter Eastman's avatar
Peter Eastman committed
327
        # Pick a unit cell size.
Justin MacCallum's avatar
Justin MacCallum committed
328

329
330
        if numAdded is not None:
            # Select a padding distance which is guaranteed to give more than the specified number of molecules.
Robert McGibbon's avatar
Robert McGibbon committed
331

332
333
334
            padding = 1.1*(numAdded/((len(pdbResidues)/pdbBoxSize[0]**3)*8))**(1.0/3.0)
            if padding < 0.5:
                padding = 0.5 # Ensure we have enough when adding very small numbers of molecules
335
336
337
338
339
340
        if boxVectors is not None:
            if is_quantity(boxVectors[0]):
                boxVectors = (boxVectors[0].value_in_unit(nanometer), boxVectors[1].value_in_unit(nanometer), boxVectors[2].value_in_unit(nanometer))
            box = Vec3(boxVectors[0][0], boxVectors[1][1], boxVectors[2][2])
            vectors = boxVectors
        elif boxSize is not None:
341
342
            if is_quantity(boxSize):
                boxSize = boxSize.value_in_unit(nanometer)
343
344
            box = Vec3(boxSize[0], boxSize[1], boxSize[2])
            vectors = (Vec3(boxSize[0], 0, 0), Vec3(0, boxSize[1], 0), Vec3(0, 0, boxSize[2]))
Peter Eastman's avatar
Peter Eastman committed
345
        elif padding is not None:
346
347
            if is_quantity(padding):
                padding = padding.value_in_unit(nanometer)
peastman's avatar
peastman committed
348
349
350
351
352
            if len(self.positions) == 0:
                maxSize = 0
            else:
                maxSize = max(max((pos[i] for pos in self.positions))-min((pos[i] for pos in self.positions)) for i in range(3))
                maxSize = maxSize.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
353
            box = (maxSize+2*padding)*Vec3(1, 1, 1)
354
            vectors = (Vec3(maxSize+2*padding, 0, 0), Vec3(0, maxSize+2*padding, 0), Vec3(0, 0, maxSize+2*padding))
Peter Eastman's avatar
Peter Eastman committed
355
        else:
356
357
            box = self.topology.getUnitCellDimensions().value_in_unit(nanometer)
            vectors = self.topology.getPeriodicBoxVectors().value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
358
            if box is None:
359
                raise ValueError('Neither the box size, box vectors, nor padding was specified, and the Topology does not define unit cell dimensions')
Peter Eastman's avatar
Peter Eastman committed
360
        invBox = Vec3(1.0/box[0], 1.0/box[1], 1.0/box[2])
Justin MacCallum's avatar
Justin MacCallum committed
361

Peter Eastman's avatar
Peter Eastman committed
362
        # Identify the ion types.
Justin MacCallum's avatar
Justin MacCallum committed
363

Peter Eastman's avatar
Peter Eastman committed
364
365
366
367
368
369
370
371
        posIonElements = {'Cs+':elem.cesium, 'K+':elem.potassium, 'Li+':elem.lithium, 'Na+':elem.sodium, 'Rb+':elem.rubidium}
        negIonElements = {'Cl-':elem.chlorine, 'Br-':elem.bromine, 'F-':elem.fluorine, 'I-':elem.iodine}
        if positiveIon not in posIonElements:
            raise ValueError('Illegal value for positive ion: %s' % positiveIon)
        if negativeIon not in negIonElements:
            raise ValueError('Illegal value for negative ion: %s' % negativeIon)
        positiveElement = posIonElements[positiveIon]
        negativeElement = negIonElements[negativeIon]
Justin MacCallum's avatar
Justin MacCallum committed
372

Peter Eastman's avatar
Peter Eastman committed
373
        # Have the ForceField build a System for the solute from which we can determine van der Waals radii.
Justin MacCallum's avatar
Justin MacCallum committed
374

Peter Eastman's avatar
Peter Eastman committed
375
376
377
378
379
380
381
382
        system = forcefield.createSystem(self.topology)
        nonbonded = None
        for i in range(system.getNumForces()):
            if isinstance(system.getForce(i), NonbondedForce):
                nonbonded = system.getForce(i)
        if nonbonded is None:
            raise ValueError('The ForceField does not specify a NonbondedForce')
        cutoff = [nonbonded.getParticleParameters(i)[1].value_in_unit(nanometer)*vdwRadiusPerSigma+waterRadius for i in range(system.getNumParticles())]
383
        waterCutoff = waterRadius
Peter Eastman's avatar
Peter Eastman committed
384
385
386
387
        if len(cutoff) == 0:
            maxCutoff = waterCutoff
        else:
            maxCutoff = max(waterCutoff, max(cutoff))
Justin MacCallum's avatar
Justin MacCallum committed
388

Peter Eastman's avatar
Peter Eastman committed
389
390
391
        # Copy the solute over.

        newTopology = Topology()
392
        newTopology.setPeriodicBoxVectors(vectors*nanometer)
Peter Eastman's avatar
Peter Eastman committed
393
394
395
        newAtoms = {}
        newPositions = []*nanometer
        for chain in self.topology.chains():
396
            newChain = newTopology.addChain(chain.id)
Peter Eastman's avatar
Peter Eastman committed
397
            for residue in chain.residues():
398
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Peter Eastman's avatar
Peter Eastman committed
399
                for atom in residue.atoms():
400
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
Peter Eastman's avatar
Peter Eastman committed
401
402
403
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
404
            newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
405

Peter Eastman's avatar
Peter Eastman committed
406
        # Sort the solute atoms into cells for fast lookup.
Justin MacCallum's avatar
Justin MacCallum committed
407

Peter Eastman's avatar
Peter Eastman committed
408
409
410
411
        if len(self.positions) == 0:
            positions = []
        else:
            positions = self.positions.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
412
        cells = {}
Peter Eastman's avatar
Peter Eastman committed
413
        numCells = tuple((max(1, int(floor(box[i]/maxCutoff))) for i in range(3)))
Peter Eastman's avatar
Peter Eastman committed
414
415
416
417
418
419
420
        cellSize = tuple((box[i]/numCells[i] for i in range(3)))
        for i in range(len(positions)):
            cell = tuple((int(floor(positions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
            if cell in cells:
                cells[cell].append(i)
            else:
                cells[cell] = [i]
Justin MacCallum's avatar
Justin MacCallum committed
421

Peter Eastman's avatar
Peter Eastman committed
422
        # Create a generator that loops over atoms close to a position.
Justin MacCallum's avatar
Justin MacCallum committed
423

Peter Eastman's avatar
Peter Eastman committed
424
425
426
427
428
429
430
431
432
433
        def neighbors(pos):
            centralCell = tuple((int(floor(pos[i]/cellSize[i])) for i in range(3)))
            offsets = (-1, 0, 1)
            for i in offsets:
                for j in offsets:
                    for k in offsets:
                        cell = ((centralCell[0]+i+numCells[0])%numCells[0], (centralCell[1]+j+numCells[1])%numCells[1], (centralCell[2]+k+numCells[2])%numCells[2])
                        if cell in cells:
                            for atom in cells[cell]:
                                yield atom
Justin MacCallum's avatar
Justin MacCallum committed
434

Peter Eastman's avatar
Peter Eastman committed
435
        # Define a function to compute the distance between two points, taking periodic boundary conditions into account.
Justin MacCallum's avatar
Justin MacCallum committed
436

Peter Eastman's avatar
Peter Eastman committed
437
438
        def periodicDistance(pos1, pos2):
            delta = pos1-pos2
439
440
441
            delta -= vectors[2]*floor(delta[2]*invBox[2]+0.5)
            delta -= vectors[1]*floor(delta[1]*invBox[1]+0.5)
            delta -= vectors[0]*floor(delta[0]*invBox[0]+0.5)
442
            return norm(delta)
Justin MacCallum's avatar
Justin MacCallum committed
443

Peter Eastman's avatar
Peter Eastman committed
444
        # Find the list of water molecules to add.
Justin MacCallum's avatar
Justin MacCallum committed
445

Peter Eastman's avatar
Peter Eastman committed
446
        newChain = newTopology.addChain()
Peter Eastman's avatar
Peter Eastman committed
447
448
449
450
451
        if len(positions) == 0:
            center = Vec3(0, 0, 0)
        else:
            center = [(max((pos[i] for pos in positions))+min((pos[i] for pos in positions)))/2 for i in range(3)]
            center = Vec3(center[0], center[1], center[2])
Peter Eastman's avatar
Peter Eastman committed
452
453
454
455
456
457
458
459
460
461
462
        numBoxes = [int(ceil(box[i]/pdbBoxSize[i])) for i in range(3)]
        addedWaters = []
        for boxx in range(numBoxes[0]):
            for boxy in range(numBoxes[1]):
                for boxz in range(numBoxes[2]):
                    offset = Vec3(boxx*pdbBoxSize[0], boxy*pdbBoxSize[1], boxz*pdbBoxSize[2])
                    for residue in pdbResidues:
                        oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
                        atomPos = pdbPositions[oxygen.index]+offset
                        if not any((atomPos[i] > box[i] for i in range(3))):
                            # This molecule is inside the box, so see how close to it is to the solute.
Justin MacCallum's avatar
Justin MacCallum committed
463

Peter Eastman's avatar
Peter Eastman committed
464
465
466
467
468
469
                            atomPos += center-box/2
                            for i in neighbors(atomPos):
                                if periodicDistance(atomPos, positions[i]) < cutoff[i]:
                                    break
                            else:
                                # Record this water molecule as one to add.
Justin MacCallum's avatar
Justin MacCallum committed
470

Peter Eastman's avatar
Peter Eastman committed
471
                                addedWaters.append((residue.index, atomPos))
Justin MacCallum's avatar
Justin MacCallum committed
472

473
474
475
        if numAdded is not None:
            # We added many more waters than we actually want.  Sort them based on distance to the nearest box edge and
            # only keep the ones in the middle.
Robert McGibbon's avatar
Robert McGibbon committed
476

477
478
            lowerBound = center-box/2
            upperBound = center+box/2
peastman's avatar
peastman committed
479
            distToEdge = (min(min(pos-lowerBound), min(upperBound-pos)) for index, pos in addedWaters)
480
481
            sortedIndex = [i[0] for i in sorted(enumerate(distToEdge), key=lambda x: -x[1])]
            addedWaters = [addedWaters[i] for i in sortedIndex[:numAdded]]
Robert McGibbon's avatar
Robert McGibbon committed
482

483
            # Compute a new periodic box size.
Robert McGibbon's avatar
Robert McGibbon committed
484

485
486
487
488
            maxSize = max(max((pos[i] for index, pos in addedWaters))-min((pos[i] for index, pos in addedWaters)) for i in range(3))
            newTopology.setUnitCellDimensions(Vec3(maxSize, maxSize, maxSize))
        else:
            # There could be clashes between water molecules at the box edges.  Find ones to remove.
Robert McGibbon's avatar
Robert McGibbon committed
489

490
491
492
493
494
495
496
497
498
499
500
501
502
503
            upperCutoff = center+box/2-Vec3(waterCutoff, waterCutoff, waterCutoff)
            lowerCutoff = center-box/2+Vec3(waterCutoff, waterCutoff, waterCutoff)
            lowerSkinPositions = [pos for index, pos in addedWaters if pos[0] < lowerCutoff[0] or pos[1] < lowerCutoff[1] or pos[2] < lowerCutoff[2]]
            filteredWaters = []
            cells = {}
            for i in range(len(lowerSkinPositions)):
                cell = tuple((int(floor(lowerSkinPositions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
                if cell in cells:
                    cells[cell].append(i)
                else:
                    cells[cell] = [i]
            for entry in addedWaters:
                pos = entry[1]
                if pos[0] < upperCutoff[0] and pos[1] < upperCutoff[1] and pos[2] < upperCutoff[2]:
Peter Eastman's avatar
Peter Eastman committed
504
                    filteredWaters.append(entry)
505
506
507
508
                else:
                    if not any((periodicDistance(lowerSkinPositions[i], pos) < waterCutoff and norm(lowerSkinPositions[i]-pos) > waterCutoff for i in neighbors(pos))):
                        filteredWaters.append(entry)
            addedWaters = filteredWaters
Justin MacCallum's avatar
Justin MacCallum committed
509

Peter Eastman's avatar
Peter Eastman committed
510
        # Add ions to neutralize the system.
Justin MacCallum's avatar
Justin MacCallum committed
511

Peter Eastman's avatar
Peter Eastman committed
512
513
514
515
516
517
518
        def addIon(element):
            # Replace a water by an ion.
            index = random.randint(0, len(addedWaters)-1)
            newResidue = newTopology.addResidue(element.symbol.upper(), newChain)
            newTopology.addAtom(element.symbol, element, newResidue)
            newPositions.append(addedWaters[index][1]*nanometer)
            del addedWaters[index]
519
520
521
522
523
524
        if neutralize:
            totalCharge = int(floor(0.5+sum((nonbonded.getParticleParameters(i)[0].value_in_unit(elementary_charge) for i in range(system.getNumParticles())))))
            if abs(totalCharge) > len(addedWaters):
                raise Exception('Cannot neutralize the system because the charge is greater than the number of available positions for ions')
            for i in range(abs(totalCharge)):
                addIon(positiveElement if totalCharge < 0 else negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
525

Peter Eastman's avatar
Peter Eastman committed
526
        # Add ions based on the desired ionic strength.
Justin MacCallum's avatar
Justin MacCallum committed
527

Peter Eastman's avatar
Peter Eastman committed
528
        numIons = len(addedWaters)*ionicStrength/(55.4*molar) # Pure water is about 55.4 molar (depending on temperature)
529
        numPairs = int(floor(numIons+0.5))
Peter Eastman's avatar
Peter Eastman committed
530
531
532
533
        for i in range(numPairs):
            addIon(positiveElement)
        for i in range(numPairs):
            addIon(negativeElement)
Justin MacCallum's avatar
Justin MacCallum committed
534

Peter Eastman's avatar
Peter Eastman committed
535
        # Add the water molecules.
Justin MacCallum's avatar
Justin MacCallum committed
536

Peter Eastman's avatar
Peter Eastman committed
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
        for index, pos in addedWaters:
            newResidue = newTopology.addResidue(residue.name, newChain)
            residue = pdbResidues[index]
            oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
            oPos = pdbPositions[oxygen.index]
            molAtoms = []
            for atom in residue.atoms():
                molAtoms.append(newTopology.addAtom(atom.name, atom.element, newResidue))
                newPositions.append((pos+pdbPositions[atom.index]-oPos)*nanometer)
            for atom1 in molAtoms:
                if atom1.element == elem.oxygen:
                    for atom2 in molAtoms:
                        if atom2.element == elem.hydrogen:
                            newTopology.addBond(atom1, atom2)
        self.topology = newTopology
        self.positions = newPositions
Justin MacCallum's avatar
Justin MacCallum committed
553

Peter Eastman's avatar
Peter Eastman committed
554
555
556
557
558
559
    class _ResidueData:
        """Inner class used to encapsulate data about the hydrogens for a residue."""
        def __init__(self, name):
            self.name = name
            self.variants = []
            self.hydrogens = []
Justin MacCallum's avatar
Justin MacCallum committed
560

Peter Eastman's avatar
Peter Eastman committed
561
562
563
564
565
566
567
568
    class _Hydrogen:
        """Inner class used to encapsulate data about a hydrogen atom."""
        def __init__(self, name, parent, maxph, variants, terminal):
            self.name = name
            self.parent = parent
            self.maxph = maxph
            self.variants = variants
            self.terminal = terminal
Justin MacCallum's avatar
Justin MacCallum committed
569

570
571
572
    @staticmethod
    def loadHydrogenDefinitions(file):
        """Load an XML file containing definitions of hydrogens that should be added by addHydrogens().
Justin MacCallum's avatar
Justin MacCallum committed
573

574
575
576
577
        The built in hydrogens.xml file containing definitions for standard amino acids and nucleotides is loaded automatically.
        This method can be used to load additional definitions for other residue types.  They will then be used in subsequent
        calls to addHydrogens().
        """
578
579
580
581
582
        tree = etree.parse(file)
        infinity = float('Inf')
        for residue in tree.getroot().findall('Residue'):
            resName = residue.attrib['name']
            data = Modeller._ResidueData(resName)
583
            Modeller._residueHydrogens[resName] = data
584
585
586
587
588
589
590
591
592
593
594
595
596
            for variant in residue.findall('Variant'):
                data.variants.append(variant.attrib['name'])
            for hydrogen in residue.findall('H'):
                maxph = infinity
                if 'maxph' in hydrogen.attrib:
                    maxph = float(hydrogen.attrib['maxph'])
                atomVariants = None
                if 'variant' in hydrogen.attrib:
                    atomVariants = hydrogen.attrib['variant'].split(',')
                terminal = None
                if 'terminal' in hydrogen.attrib:
                    terminal = hydrogen.attrib['terminal']
                data.hydrogens.append(Modeller._Hydrogen(hydrogen.attrib['name'], hydrogen.attrib['parent'], maxph, atomVariants, terminal))
Justin MacCallum's avatar
Justin MacCallum committed
597

598
    def addHydrogens(self, forcefield=None, pH=7.0, variants=None, platform=None):
Peter Eastman's avatar
Peter Eastman committed
599
        """Add missing hydrogens to the model.
Justin MacCallum's avatar
Justin MacCallum committed
600

Peter Eastman's avatar
Peter Eastman committed
601
602
603
        Some residues can exist in multiple forms depending on the pH and properties of the local environment.  These
        variants differ in the presence or absence of particular hydrogens.  In particular, the following variants
        are supported:
Justin MacCallum's avatar
Justin MacCallum committed
604

Peter Eastman's avatar
Peter Eastman committed
605
606
607
        Aspartic acid:
            ASH: Neutral form with a hydrogen on one of the delta oxygens
            ASP: Negatively charged form without a hydrogen on either delta oxygen
Justin MacCallum's avatar
Justin MacCallum committed
608

Peter Eastman's avatar
Peter Eastman committed
609
610
611
        Cysteine:
            CYS: Neutral form with a hydrogen on the sulfur
            CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond)
Justin MacCallum's avatar
Justin MacCallum committed
612

Peter Eastman's avatar
Peter Eastman committed
613
614
615
        Glutamic acid:
            GLH: Neutral form with a hydrogen on one of the epsilon oxygens
            GLU: Negatively charged form without a hydrogen on either epsilon oxygen
Justin MacCallum's avatar
Justin MacCallum committed
616

Peter Eastman's avatar
Peter Eastman committed
617
618
619
620
        Histidine:
            HID: Neutral form with a hydrogen on the ND1 atom
            HIE: Neutral form with a hydrogen on the NE2 atom
            HIP: Positively charged form with hydrogens on both ND1 and NE2
Peter Eastman's avatar
Peter Eastman committed
621
            HIN: Negatively charged form without a hydrogen on either ND1 or NE2
Justin MacCallum's avatar
Justin MacCallum committed
622

Peter Eastman's avatar
Peter Eastman committed
623
624
625
        Lysine:
            LYN: Neutral form with two hydrogens on the zeta nitrogen
            LYS: Positively charged form with three hydrogens on the zeta nitrogen
Justin MacCallum's avatar
Justin MacCallum committed
626

Peter Eastman's avatar
Peter Eastman committed
627
        The variant to use for each residue is determined by the following rules:
Justin MacCallum's avatar
Justin MacCallum committed
628

Peter Eastman's avatar
Peter Eastman committed
629
630
631
        1. The most common variant at the specified pH is selected.
        2. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH.
        3. For a neutral Histidine residue, the HID or HIE variant is selected based on which one forms a better hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
632

Peter Eastman's avatar
Peter Eastman committed
633
634
635
636
637
638
639
640
        You can override these rules by explicitly specifying a variant for any residue.  To do that, provide a list for the
        'variants' parameter, and set the corresponding element to the name of the variant to use.

        A special case is when the model already contains a hydrogen that should not be present in the desired variant.
        If you explicitly specify a variant using the 'variants' parameter, the residue will be modified to match the
        desired variant, removing hydrogens if necessary.  On the other hand, for residues whose variant is selected
        automatically, this function will only add hydrogens.  It will never remove ones that are already present in the
        model, regardless of the specified pH.
Justin MacCallum's avatar
Justin MacCallum committed
641

642
643
644
        Definitions for standard amino acids and nucleotides are built in.  You can call loadHydrogenDefinitions() to load
        additional definitions for other residue types.

Robert McGibbon's avatar
Robert McGibbon committed
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
        Parameters
        ----------
        forcefield : ForceField=None
            the ForceField to use for determining the positions of hydrogens.
            If this is None, positions will be picked which are generally
            reasonable but not optimized for any particular ForceField.
        pH : float=7.0
            the pH based on which to select variants
        variants : list=None
            an optional list of variants to use.  If this is specified, its
            length must equal the number of residues in the model.  variants[i]
            is the name of the variant to use for residue i (indexed starting at
            0). If an element is None, the standard rules will be followed to
            select a variant for that residue.
        platform : Platform=None
            the Platform to use when computing the hydrogen atom positions.  If
            this is None, the default Platform will be used.

        Returns
        -------
665
        list
Robert McGibbon's avatar
Robert McGibbon committed
666
             a list of what variant was actually selected for each residue,
Robert McGibbon's avatar
Robert McGibbon committed
667
             in the same format as the variants parameter
Peter Eastman's avatar
Peter Eastman committed
668
669
        """
        # Check the list of variants.
Justin MacCallum's avatar
Justin MacCallum committed
670

Peter Eastman's avatar
Peter Eastman committed
671
672
673
674
675
676
        residues = list(self.topology.residues())
        if variants is not None:
            if len(variants) != len(residues):
                raise ValueError("The length of the variants list must equal the number of residues")
        else:
            variants = [None]*len(residues)
677
        actualVariants = [None]*len(residues)
Justin MacCallum's avatar
Justin MacCallum committed
678

Peter Eastman's avatar
Peter Eastman committed
679
        # Load the residue specifications.
Justin MacCallum's avatar
Justin MacCallum committed
680

681
682
        if not Modeller._hasLoadedStandardHydrogens:
            Modeller.loadHydrogenDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'hydrogens.xml'))
Peter Eastman's avatar
Peter Eastman committed
683
684

        # Make a list of atoms bonded to each atom.
Justin MacCallum's avatar
Justin MacCallum committed
685

Peter Eastman's avatar
Peter Eastman committed
686
        bonded = {}
687
688
        for atom in self.topology.atoms():
            bonded[atom] = []
Peter Eastman's avatar
Peter Eastman committed
689
690
691
        for atom1, atom2 in self.topology.bonds():
            bonded[atom1].append(atom2)
            bonded[atom2].append(atom1)
Justin MacCallum's avatar
Justin MacCallum committed
692

693
        # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria.
Justin MacCallum's avatar
Justin MacCallum committed
694

695
696
697
698
699
700
701
702
        def isHbond(d, h, a):
            if norm(d-a) > 0.35*nanometer:
                return False
            deltaDH = h-d
            deltaHA = a-h
            deltaDH /= norm(deltaDH)
            deltaHA /= norm(deltaHA)
            return acos(dot(deltaDH, deltaHA)) < 50*degree
Justin MacCallum's avatar
Justin MacCallum committed
703

Peter Eastman's avatar
Peter Eastman committed
704
        # Loop over residues.
Justin MacCallum's avatar
Justin MacCallum committed
705

Peter Eastman's avatar
Peter Eastman committed
706
        newTopology = Topology()
707
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
Peter Eastman's avatar
Peter Eastman committed
708
709
710
711
712
        newAtoms = {}
        newPositions = []*nanometer
        newIndices = []
        acceptors = [atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen)]
        for chain in self.topology.chains():
713
            newChain = newTopology.addChain(chain.id)
Peter Eastman's avatar
Peter Eastman committed
714
            for residue in chain.residues():
715
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Peter Eastman's avatar
Peter Eastman committed
716
717
                isNTerminal = (residue == chain._residues[0])
                isCTerminal = (residue == chain._residues[-1])
718
                if residue.name in Modeller._residueHydrogens:
Peter Eastman's avatar
Peter Eastman committed
719
                    # Add hydrogens.  First select which variant to use.
Justin MacCallum's avatar
Justin MacCallum committed
720

721
                    spec = Modeller._residueHydrogens[residue.name]
Peter Eastman's avatar
Peter Eastman committed
722
723
724
725
                    variant = variants[residue.index]
                    if variant is None:
                        if residue.name == 'CYS':
                            # If this is part of a disulfide, use CYX.
Justin MacCallum's avatar
Justin MacCallum committed
726

Peter Eastman's avatar
Peter Eastman committed
727
728
729
730
                            sulfur = [atom for atom in residue.atoms() if atom.element == elem.sulfur]
                            if len(sulfur) == 1 and any((atom.residue != residue for atom in bonded[sulfur[0]])):
                                variant = 'CYX'
                        if residue.name == 'HIS' and pH > 6.5:
731
                            # See if either nitrogen already has a hydrogen attached.
Justin MacCallum's avatar
Justin MacCallum committed
732

733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
                            nd1 = [atom for atom in residue.atoms() if atom.name == 'ND1']
                            ne2 = [atom for atom in residue.atoms() if atom.name == 'NE2']
                            if len(nd1) != 1 or len(ne2) != 1:
                                raise ValueError('HIS residue (%d) has the wrong set of atoms' % residue.index)
                            nd1 = nd1[0]
                            ne2 = ne2[0]
                            nd1HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[nd1]))
                            ne2HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[ne2]))
                            if nd1HasHydrogen and ne2HasHydrogen:
                                variant = 'HIP'
                            elif nd1HasHydrogen:
                                variant = 'HID'
                            elif ne2HasHydrogen:
                                variant = 'HIE'
                            else:
                                # Estimate the hydrogen positions.
Justin MacCallum's avatar
Justin MacCallum committed
749

750
751
752
753
754
755
756
757
758
759
760
761
762
763
                                nd1Pos = self.positions[nd1.index]
                                ne2Pos = self.positions[ne2.index]
                                hd1Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[nd1]:
                                    hd1Delta += nd1Pos-self.positions[other.index]
                                hd1Delta *= 0.1*nanometer/norm(hd1Delta)
                                hd1Pos = nd1Pos+hd1Delta
                                he2Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[ne2]:
                                    he2Delta += ne2Pos-self.positions[other.index]
                                he2Delta *= 0.1*nanometer/norm(he2Delta)
                                he2Pos = ne2Pos+he2Delta

                                # See whether either hydrogen would form a hydrogen bond.
Justin MacCallum's avatar
Justin MacCallum committed
764

765
766
767
768
769
770
771
772
773
774
775
776
777
778
                                nd1IsBonded = False
                                ne2IsBonded = False
                                for acceptor in acceptors:
                                    if acceptor.residue != residue:
                                        acceptorPos = self.positions[acceptor.index]
                                        if isHbond(nd1Pos, hd1Pos, acceptorPos):
                                            nd1IsBonded = True
                                            break
                                        if isHbond(ne2Pos, he2Pos, acceptorPos):
                                            ne2IsBonded = True
                                if ne2IsBonded and not nd1IsBonded:
                                    variant = 'HIE'
                                else:
                                    variant = 'HID'
Peter Eastman's avatar
Peter Eastman committed
779
780
781
782
                        elif residue.name == 'HIS':
                            variant = 'HIP'
                    if variant is not None and variant not in spec.variants:
                        raise ValueError('Illegal variant for %s residue: %s' % (residue.name, variant))
783
                    actualVariants[residue.index] = variant
Peter Eastman's avatar
Peter Eastman committed
784
                    removeExtraHydrogens = (variants[residue.index] is not None)
Justin MacCallum's avatar
Justin MacCallum committed
785

Peter Eastman's avatar
Peter Eastman committed
786
                    # Make a list of hydrogens that should be present in the residue.
Justin MacCallum's avatar
Justin MacCallum committed
787

Peter Eastman's avatar
Peter Eastman committed
788
789
790
791
792
                    parents = [atom for atom in residue.atoms() if atom.element != elem.hydrogen]
                    parentNames = [atom.name for atom in parents]
                    hydrogens = [h for h in spec.hydrogens if (variant is None and pH <= h.maxph) or (h.variants is None and pH <= h.maxph) or (h.variants is not None and variant in h.variants)]
                    hydrogens = [h for h in hydrogens if h.terminal is None or (isNTerminal and h.terminal == 'N') or (isCTerminal and h.terminal == 'C')]
                    hydrogens = [h for h in hydrogens if h.parent in parentNames]
Justin MacCallum's avatar
Justin MacCallum committed
793

Peter Eastman's avatar
Peter Eastman committed
794
                    # Loop over atoms in the residue, adding them to the new topology along with required hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
795

Peter Eastman's avatar
Peter Eastman committed
796
                    for parent in residue.atoms():
Peter Eastman's avatar
Peter Eastman committed
797
798
799
800
801
                        # Check whether this is a hydrogen that should be removed.

                        if removeExtraHydrogens and parent.element == elem.hydrogen and not any(parent.name == h.name for h in hydrogens):
                            continue

Peter Eastman's avatar
Peter Eastman committed
802
                        # Add the atom.
Justin MacCallum's avatar
Justin MacCallum committed
803

Peter Eastman's avatar
Peter Eastman committed
804
805
806
807
808
                        newAtom = newTopology.addAtom(parent.name, parent.element, newResidue)
                        newAtoms[parent] = newAtom
                        newPositions.append(deepcopy(self.positions[parent.index]))
                        if parent in parents:
                            # Match expected hydrogens with existing ones and find which ones need to be added.
Justin MacCallum's avatar
Justin MacCallum committed
809

Peter Eastman's avatar
Peter Eastman committed
810
811
812
813
                            existing = [atom for atom in bonded[parent] if atom.element == elem.hydrogen]
                            expected = [h for h in hydrogens if h.parent == parent.name]
                            if len(existing) < len(expected):
                                # Try to match up existing hydrogens to expected ones.
Justin MacCallum's avatar
Justin MacCallum committed
814

Peter Eastman's avatar
Peter Eastman committed
815
816
817
818
819
820
821
822
                                matches = []
                                for e in existing:
                                    match = [h for h in expected if h.name == e.name]
                                    if len(match) > 0:
                                        matches.append(match[0])
                                        expected.remove(match[0])
                                    else:
                                        matches.append(None)
Justin MacCallum's avatar
Justin MacCallum committed
823

Peter Eastman's avatar
Peter Eastman committed
824
                                # If any hydrogens couldn't be matched by name, just match them arbitrarily.
Justin MacCallum's avatar
Justin MacCallum committed
825

Peter Eastman's avatar
Peter Eastman committed
826
827
828
829
                                for i in range(len(matches)):
                                    if matches[i] is None:
                                        matches[i] = expected[-1]
                                        expected.remove(expected[-1])
Justin MacCallum's avatar
Justin MacCallum committed
830

Peter Eastman's avatar
Peter Eastman committed
831
                                # Add the missing hydrogens.
Justin MacCallum's avatar
Justin MacCallum committed
832

Peter Eastman's avatar
Peter Eastman committed
833
834
835
                                for h in expected:
                                    newH = newTopology.addAtom(h.name, elem.hydrogen, newResidue)
                                    newIndices.append(newH.index)
836
837
                                    delta = Vec3(0, 0, 0)*nanometer
                                    if len(bonded[parent]) > 0:
Peter Eastman's avatar
Peter Eastman committed
838
839
840
841
                                        for other in bonded[parent]:
                                            delta += self.positions[parent.index]-self.positions[other.index]
                                    else:
                                        delta = Vec3(random.random(), random.random(), random.random())*nanometer
842
                                    delta *= 0.1*nanometer/norm(delta)
843
844
                                    delta += 0.05*Vec3(random.random(), random.random(), random.random())*nanometer
                                    delta *= 0.1*nanometer/norm(delta)
Peter Eastman's avatar
Peter Eastman committed
845
846
847
848
                                    newPositions.append(self.positions[parent.index]+delta)
                                    newTopology.addBond(newAtom, newH)
                else:
                    # Just copy over the residue.
Justin MacCallum's avatar
Justin MacCallum committed
849

Peter Eastman's avatar
Peter Eastman committed
850
851
852
853
854
855
856
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
Justin MacCallum's avatar
Justin MacCallum committed
857

858
859
860
861
        # The hydrogens were added at random positions.  Now perform an energy minimization to fix them up.

        if forcefield is not None:
            # Use the ForceField the user specified.
Robert McGibbon's avatar
Robert McGibbon committed
862

863
            system = forcefield.createSystem(newTopology, rigidWater=False, nonbondedMethod=CutoffNonPeriodic)
864
865
866
867
868
869
870
871
            atoms = list(newTopology.atoms())
            for i in range(system.getNumParticles()):
                if atoms[i].element != elem.hydrogen:
                    # This is a heavy atom, so make it immobile.
                    system.setParticleMass(i, 0)
        else:
            # Create a System that restrains the distance of each hydrogen from its parent atom
            # and causes hydrogens to spread out evenly.
Robert McGibbon's avatar
Robert McGibbon committed
872

873
            system = System()
874
            nonbonded = CustomNonbondedForce('100/((r/0.1)^4+1)')
875
876
            nonbonded.setNonbondedMethod(CustomNonbondedForce.CutoffNonPeriodic);
            nonbonded.setCutoffDistance(1*nanometer)
877
878
879
880
881
            bonds = HarmonicBondForce()
            angles = HarmonicAngleForce()
            system.addForce(nonbonded)
            system.addForce(bonds)
            system.addForce(angles)
882
            bondedTo = []
883
884
885
886
887
888
            for atom in newTopology.atoms():
                nonbonded.addParticle([])
                if atom.element != elem.hydrogen:
                    system.addParticle(0.0)
                else:
                    system.addParticle(1.0)
889
                bondedTo.append([])
890
891
892
            for atom1, atom2 in newTopology.bonds():
                if atom1.element == elem.hydrogen or atom2.element == elem.hydrogen:
                    bonds.addBond(atom1.index, atom2.index, 0.1, 100000.0)
893
894
                bondedTo[atom1.index].append(atom2)
                bondedTo[atom2.index].append(atom1)
895
896
            for residue in newTopology.residues():
                if residue.name == 'HOH':
897
                    # Add an angle term to make the water geometry correct.
Robert McGibbon's avatar
Robert McGibbon committed
898

899
900
901
902
903
                    atoms = list(residue.atoms())
                    oindex = [i for i in range(len(atoms)) if atoms[i].element == elem.oxygen]
                    if len(atoms) == 3 and len(oindex) == 1:
                        hindex = list(set([0,1,2])-set(oindex))
                        angles.addAngle(atoms[hindex[0]].index, atoms[oindex[0]].index, atoms[hindex[1]].index, 1.824, 836.8)
904
905
                else:
                    # Add angle terms for any hydroxyls.
Robert McGibbon's avatar
Robert McGibbon committed
906

907
908
909
910
                    for atom in residue.atoms():
                        index = atom.index
                        if atom.element == elem.oxygen and len(bondedTo[index]) == 2 and elem.hydrogen in (a.element for a in bondedTo[index]):
                            angles.addAngle(bondedTo[index][0].index, index, bondedTo[index][1].index, 1.894, 460.24)
Robert McGibbon's avatar
Robert McGibbon committed
911

912
913
914
915
        if platform is None:
            context = Context(system, VerletIntegrator(0.0))
        else:
            context = Context(system, VerletIntegrator(0.0), platform)
Peter Eastman's avatar
Peter Eastman committed
916
        context.setPositions(newPositions)
917
        LocalEnergyMinimizer.minimize(context, 1.0, 50)
Peter Eastman's avatar
Peter Eastman committed
918
919
        self.topology = newTopology
        self.positions = context.getState(getPositions=True).getPositions()
920
        del context
921
        return actualVariants
Justin MacCallum's avatar
Justin MacCallum committed
922

923
    def addExtraParticles(self, forcefield):
Robert McGibbon's avatar
Robert McGibbon committed
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
        """Add missing extra particles to the model that are required by a force
        field.

        Some force fields use "extra particles" that do not represent
        actual atoms, but still need to be included in the System.  Examples
        include lone pairs, Drude particles, and the virtual sites used in some
        water models to adjust the charge distribution.  Extra particles can be
        recognized by the fact that their element is None.

        This method is primarily used to add extra particles, but it can also
        remove them.  It tries to match every residue in the Topology to a
        template in the force field.  If there is no match, it will both add
        and remove extra particles as necessary to make it match.

        Parameters
        ----------
        forcefield : ForceField
            the ForceField defining what extra particles should be present
942
943
        """
        # Create copies of all residue templates that have had all extra points removed.
Justin MacCallum's avatar
Justin MacCallum committed
944

945
        templatesNoEP = {}
946
        for resName, template  in forcefield._templates.items():
947
948
949
950
951
952
953
954
            if any(atom.element is None for atom in template.atoms):
                index = 0
                newIndex = {}
                newTemplate = ForceField._TemplateData(resName)
                for i, atom in enumerate(template.atoms):
                    if atom.element is not None:
                        newIndex[i] = index
                        index += 1
955
956
957
                        newAtom = ForceField._TemplateAtomData(atom.name, atom.type, atom.element)
                        newAtom.externalBonds = atom.externalBonds
                        newTemplate.atoms.append(newAtom)
958
959
960
961
962
963
964
965
966
                for b1, b2 in template.bonds:
                    if b1 in newIndex and b2 in newIndex:
                        newTemplate.bonds.append((newIndex[b1], newIndex[b2]))
                        newTemplate.atoms[newIndex[b1]].bondedTo.append(newIndex[b2])
                        newTemplate.atoms[newIndex[b2]].bondedTo.append(newIndex[b1])
                for b in template.externalBonds:
                    if b in newIndex:
                        newTemplate.externalBonds.append(newIndex[b])
                templatesNoEP[template] = newTemplate
Justin MacCallum's avatar
Justin MacCallum committed
967

968
        # Record which atoms are bonded to each other atom, with and without extra particles.
Justin MacCallum's avatar
Justin MacCallum committed
969

970
971
972
973
        bondedToAtom = []
        bondedToAtomNoEP = []
        for atom in self.topology.atoms():
            bondedToAtom.append(set())
peastman's avatar
peastman committed
974
            bondedToAtomNoEP.append(set())
975
976
977
978
979
980
        for atom1, atom2 in self.topology.bonds():
            bondedToAtom[atom1.index].add(atom2.index)
            bondedToAtom[atom2.index].add(atom1.index)
            if atom1.element is not None and atom2.element is not None:
                bondedToAtomNoEP[atom1.index].add(atom2.index)
                bondedToAtomNoEP[atom2.index].add(atom1.index)
Justin MacCallum's avatar
Justin MacCallum committed
981

peastman's avatar
peastman committed
982
983
        # If the force field has a DrudeForce, record the types of Drude particles and their parents since we'll
        # need them for picking particle positions.
Justin MacCallum's avatar
Justin MacCallum committed
984

peastman's avatar
peastman committed
985
986
987
988
989
        drudeTypeMap = {}
        for force in forcefield._forces:
            if isinstance(force, DrudeGenerator):
                for type in force.typeMap:
                    drudeTypeMap[type] = force.typeMap[type][0]
Justin MacCallum's avatar
Justin MacCallum committed
990

991
        # Create the new Topology.
Justin MacCallum's avatar
Justin MacCallum committed
992

993
        newTopology = Topology()
994
        newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
995
996
        newAtoms = {}
        newPositions = []*nanometer
997
        missingPositions = set()
998
        for chain in self.topology.chains():
999
            newChain = newTopology.addChain(chain.id)
1000
            for residue in chain.residues():
1001
                newResidue = newTopology.addResidue(residue.name, newChain, residue.id)
Justin MacCallum's avatar
Justin MacCallum committed
1002

1003
                # Look for a matching template.
Justin MacCallum's avatar
Justin MacCallum committed
1004

1005
1006
1007
1008
                matchFound = False
                signature = _createResidueSignature([atom.element for atom in residue.atoms()])
                if signature in forcefield._templateSignatures:
                    for t in forcefield._templateSignatures[signature]:
peastman's avatar
peastman committed
1009
                        if _matchResidue(residue, t, bondedToAtom, False) is not None:
1010
1011
1012
                            matchFound = True
                if matchFound:
                    # Just copy the residue over.
Justin MacCallum's avatar
Justin MacCallum committed
1013

1014
1015
1016
1017
1018
1019
1020
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
                else:
                    # There's no matching template.  Try to find one that matches based on everything except
                    # extra points.
Justin MacCallum's avatar
Justin MacCallum committed
1021

1022
                    template = None
1023
                    residueNoEP = Residue(residue.name, residue.index, residue.chain, residue.id)
1024
1025
1026
1027
                    residueNoEP._atoms = [atom for atom in residue.atoms() if atom.element is not None]
                    if signature in forcefield._templateSignatures:
                        for t in forcefield._templateSignatures[signature]:
                            if t in templatesNoEP:
peastman's avatar
peastman committed
1028
                                matches = _matchResidue(residueNoEP, templatesNoEP[t], bondedToAtomNoEP, False)
1029
1030
1031
1032
1033
                                if matches is not None:
                                    template = t;
                                    # Record the corresponding atoms.
                                    matchingAtoms = {}
                                    for atom, match in zip(residueNoEP.atoms(), matches):
peastman's avatar
peastman committed
1034
                                        templateAtomName = templatesNoEP[t].atoms[match].name
1035
1036
1037
1038
1039
1040
                                        for templateAtom in template.atoms:
                                            if templateAtom.name == templateAtomName:
                                                matchingAtoms[templateAtom] = atom
                                    break
                    if template is None:
                        raise ValueError('Residue %d (%s) does not match any template defined by the ForceField.' % (residue.index+1, residue.name))
Justin MacCallum's avatar
Justin MacCallum committed
1041

1042
                    # Add the regular atoms.
Justin MacCallum's avatar
Justin MacCallum committed
1043

1044
1045
1046
1047
                    for atom in residue.atoms():
                        if atom.element is not None:
                            newAtoms[atom] = newTopology.addAtom(atom.name, atom.element, newResidue)
                            newPositions.append(deepcopy(self.positions[atom.index]))
Justin MacCallum's avatar
Justin MacCallum committed
1048

1049
                    # Add the extra points.
Justin MacCallum's avatar
Justin MacCallum committed
1050

1051
1052
1053
                    templateAtomPositions = len(template.atoms)*[None]
                    for index, atom in enumerate(template.atoms):
                        if atom in matchingAtoms:
peastman's avatar
peastman committed
1054
                            templateAtomPositions[index] = self.positions[matchingAtoms[atom].index].value_in_unit(nanometer)
1055
                    newExtraPoints = {}
1056
1057
                    for index, atom in enumerate(template.atoms):
                        if atom.element is None:
1058
                            newExtraPoints[atom] = newTopology.addAtom(atom.name, None, newResidue)
1059
1060
1061
1062
                            position = None
                            for site in template.virtualSites:
                                if site.index == index:
                                    # This is a virtual site.  Compute its position by the correct rule.
Justin MacCallum's avatar
Justin MacCallum committed
1063

1064
                                    if site.type == 'average2':
Matthew Harrigan's avatar
Matthew Harrigan committed
1065
                                        position = site.weights[0]*templateAtomPositions[site.atoms[0]] + site.weights[1]*templateAtomPositions[site.atoms[1]]
1066
                                    elif site.type == 'average3':
Matthew Harrigan's avatar
Matthew Harrigan committed
1067
                                        position = site.weights[0]*templateAtomPositions[site.atoms[0]] + site.weights[1]*templateAtomPositions[site.atoms[1]] + site.weights[2]*templateAtomPositions[site.atoms[2]]
1068
                                    elif site.type == 'outOfPlane':
Matthew Harrigan's avatar
Matthew Harrigan committed
1069
1070
                                        v1 = templateAtomPositions[site.atoms[1]] - templateAtomPositions[site.atoms[0]]
                                        v2 = templateAtomPositions[site.atoms[2]] - templateAtomPositions[site.atoms[0]]
peastman's avatar
peastman committed
1071
                                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Matthew Harrigan's avatar
Matthew Harrigan committed
1072
                                        position = templateAtomPositions[site.atoms[0]] + site.weights[0]*v1 + site.weights[1]*v2 + site.weights[2]*cross
1073
1074
1075
1076
1077
1078
1079
1080
1081
                                    elif site.type == 'localCoords':
                                        origin = templateAtomPositions[site.atoms[0]]*site.originWeights[0] + templateAtomPositions[site.atoms[1]]*site.originWeights[1] + templateAtomPositions[site.atoms[2]]*site.originWeights[2];
                                        xdir = templateAtomPositions[site.atoms[0]]*site.xWeights[0] + templateAtomPositions[site.atoms[1]]*site.xWeights[1] + templateAtomPositions[site.atoms[2]]*site.xWeights[2];
                                        ydir = templateAtomPositions[site.atoms[0]]*site.yWeights[0] + templateAtomPositions[site.atoms[1]]*site.yWeights[1] + templateAtomPositions[site.atoms[2]]*site.yWeights[2];
                                        zdir = Vec3(xdir[1]*ydir[2]-xdir[2]*ydir[1], xdir[2]*ydir[0]-xdir[0]*ydir[2], xdir[0]*ydir[1]-xdir[1]*ydir[0])
                                        xdir /= norm(xdir);
                                        zdir /= norm(zdir);
                                        ydir = Vec3(zdir[1]*xdir[2]-zdir[2]*xdir[1], zdir[2]*xdir[0]-zdir[0]*xdir[2], zdir[0]*xdir[1]-zdir[1]*xdir[0])
                                        position = origin + xdir*site.localPos[0] + ydir*site.localPos[1] + zdir*site.localPos[2];
peastman's avatar
peastman committed
1082
1083
                            if position is None and atom.type in drudeTypeMap:
                                # This is a Drude particle.  Put it on top of its parent atom.
Justin MacCallum's avatar
Justin MacCallum committed
1084

peastman's avatar
peastman committed
1085
1086
1087
                                for atom2, pos in zip(template.atoms, templateAtomPositions):
                                    if atom2.type in drudeTypeMap[atom.type]:
                                        position = deepcopy(pos)
1088
                            if position is None:
1089
1090
                                # We couldn't figure out the correct position.  Put it at a random position near the center of the residue,
                                # and we'll try to fix it later based on bonds.
Justin MacCallum's avatar
Justin MacCallum committed
1091

1092
                                knownPositions = [x for x in templateAtomPositions if x is not None]
1093
1094
                                position = Vec3(random.gauss(0, 1), random.gauss(0, 1), random.gauss(0, 1))+(unit.sum(knownPositions)/len(knownPositions))
                                missingPositions.add(len(newPositions))
peastman's avatar
peastman committed
1095
                            newPositions.append(position*nanometer)
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105

                    # Add bonds involving the extra points.

                    for atom1, atom2 in template.bonds:
                        atom1 = template.atoms[atom1]
                        atom2 = template.atoms[atom2]
                        if atom1 in newExtraPoints or atom2 in newExtraPoints:
                            if atom1 in newExtraPoints:
                                a1 = newExtraPoints[atom1]
                            else:
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
1106
                                a1 = newAtoms[matchingAtoms[atom1]]
1107
1108
1109
                            if atom2 in newExtraPoints:
                                a2 = newExtraPoints[atom2]
                            else:
Peter Eastman's avatar
Bug fix  
Peter Eastman committed
1110
                                a2 = newAtoms[matchingAtoms[atom2]]
1111
                            newTopology.addBond(a1, a2)
1112

1113
1114
1115
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129

        if len(missingPositions) > 0:
            # There were particles whose position we couldn't identify before, since they were neither virtual sites nor Drude particles.
            # Try to figure them out based on bonds.  First, use the ForceField to create a list of every bond involving one of them.

            system = forcefield.createSystem(newTopology, constraints=AllBonds)
            bonds = []
            for i in range(system.getNumConstraints()):
                bond = system.getConstraintParameters(i)
                if bond[0] in missingPositions or bond[1] in missingPositions:
                    bonds.append(bond)

            # Now run a few iterations of SHAKE to try to select reasonable positions.

1130
            for iteration in range(15):
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
                for atom1, atom2, distance in bonds:
                    if atom1 in missingPositions:
                        if atom2 in missingPositions:
                            weights = (0.5, 0.5)
                        else:
                            weights = (1.0, 0.0)
                    else:
                        weights = (0.0, 1.0)
                    delta = newPositions[atom2]-newPositions[atom1]
                    length = norm(delta)
                    delta *= (distance-length)/length
                    newPositions[atom1] -= weights[0]*delta
                    newPositions[atom2] += weights[1]*delta
1144

1145
        self.topology = newTopology
1146
        self.positions = newPositions