modeller.py 35.3 KB
Newer Older
1
2
"""
modeller.py: Provides tools for editing molecular models
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.

Portions copyright (c) 2012 Stanford University and the Authors.
Authors: Peter Eastman
Contributors:

Permission is hereby granted, free of charge, to any person obtaining a 
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
30
"""
Peter Eastman's avatar
Peter Eastman committed
31
32
from __future__ import division

33
34
35
__author__ = "Peter Eastman"
__version__ = "1.0"

Peter Eastman's avatar
Peter Eastman committed
36
from simtk.openmm.app import Topology, PDBFile
Peter Eastman's avatar
Peter Eastman committed
37
from simtk.openmm.app.forcefield import HAngles
38
from simtk.openmm.vec3 import Vec3
39
40
from simtk.openmm import System, Context, NonbondedForce, VerletIntegrator, LocalEnergyMinimizer
from simtk.unit import nanometer, molar, elementary_charge, amu, gram, liter, degree, sqrt, acos, is_quantity, dot, norm
41
import element as elem
Peter Eastman's avatar
Peter Eastman committed
42
43
import os
import random
Peter Eastman's avatar
Peter Eastman committed
44
import xml.etree.ElementTree as etree
Peter Eastman's avatar
Peter Eastman committed
45
46
from copy import deepcopy
from math import ceil, floor
47
48
49
50
51
52
53
54
55
56

class Modeller(object):
    """Modeller provides tools for editing molecular models, such as adding water or missing hydrogens.
    
    To use it, create a Modeller object, specifying the initial Topology and atom positions.  You can
    then call various methods to change the model in different ways.  Each time you do, a new Topology
    and list of coordinates is created to represent the changed model.  Finally, call getTopology()
    and getPositions() to get the results.
    """
        
Peter Eastman's avatar
Peter Eastman committed
57
58
    _residueHydrogens = None

59
60
61
62
63
64
65
66
    def __init__(self, topology, positions):
        """Create a new Modeller object
        
        Parameters:
         - topology (Topology) the initial Topology of the model
         - positions (list) the initial atomic positions
        """
        self.topology = topology
Peter Eastman's avatar
Peter Eastman committed
67
68
        if not is_quantity(positions):
            positions = positions*nanometers
69
70
71
72
73
74
75
76
77
78
79
80
81
        self.positions = positions
        
    def getTopology(self):
        """Get the Topology of the model."""
        return self.topology
        
    def getPositions(self):
        """Get the atomic positions."""
        return self.positions

    def deleteWater(self):
        """Delete all water molecules from the model."""
        newTopology = Topology()
Peter Eastman's avatar
Peter Eastman committed
82
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
83
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
84
        newPositions = []*nanometer
85
86
87
88
89
90
91
92
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                if residue.name != "HOH":
                    newResidue = newTopology.addResidue(residue.name, newChain)
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
Peter Eastman's avatar
Peter Eastman committed
93
                        newPositions.append(deepcopy(self.positions[atom.index]))
94
95
96
97
98
99
100
101
102
103
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions
    
    def convertWater(self, model='tip3p'):
        """Convert all water molecules to a different water model.
        
        Parameters:
Peter Eastman's avatar
Peter Eastman committed
104
         - model (string='tip3p') the water model to convert to.  Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
105
        """
Peter Eastman's avatar
Peter Eastman committed
106
        if model in ('tip3p', 'spce'):
107
108
109
110
111
112
113
114
            sites = 3
        elif model == 'tip4pew':
            sites = 4
        elif model == 'tip5p':
            sites = 5
        else:
            raise ValueError('Unknown water model: %s' % model)
        newTopology = Topology()
Peter Eastman's avatar
Peter Eastman committed
115
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
116
        newAtoms = {}
Peter Eastman's avatar
Peter Eastman committed
117
        newPositions = []*nanometer
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
                if residue.name == "HOH":
                    # Copy the oxygen and hydrogens
                    oatom = [atom for atom in residue.atoms() if atom.element == elem.oxygen]
                    hatoms = [atom for atom in residue.atoms() if atom.element == elem.hydrogen]
                    if len(oatom) != 1 or len(hatoms) != 2:
                        raise ValueError('Illegal water molecule (residue %d): contains %d oxygen(s) and %d hydrogen(s)' % (residue.index, len(oatom), len(hatoms)))
                    o = newTopology.addAtom(oatom[0].name, oatom[0].element, newResidue)
                    h1 = newTopology.addAtom(hatoms[0].name, hatoms[0].element, newResidue)
                    h2 = newTopology.addAtom(hatoms[1].name, hatoms[1].element, newResidue)
                    newAtoms[oatom[0]] = o
                    newAtoms[hatoms[0]] = h1
                    newAtoms[hatoms[1]] = h2
Peter Eastman's avatar
Peter Eastman committed
134
135
136
                    po = deepcopy(self.positions[oatom[0].index])
                    ph1 = deepcopy(self.positions[hatoms[0].index])
                    ph2 = deepcopy(self.positions[hatoms[1].index])
137
138
139
140
141
142
143
144
                    newPositions.append(po)
                    newPositions.append(ph1)
                    newPositions.append(ph2)
                    
                    # Add virtual sites.
                    
                    if sites == 4:
                        newTopology.addAtom('M', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
145
                        newPositions.append(0.786646558*po + 0.106676721*ph1 + 0.106676721*ph2)
146
147
148
                    elif sites == 5:
                        newTopology.addAtom('M1', None, newResidue)
                        newTopology.addAtom('M2', None, newResidue)
Peter Eastman's avatar
Peter Eastman committed
149
150
                        v1 = (ph1-po).value_in_unit(nanometer)
                        v2 = (ph2-po).value_in_unit(nanometer)
151
                        cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
Peter Eastman's avatar
Peter Eastman committed
152
153
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 - 6.4437903*cross)*nanometer)
                        newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 + 6.4437903*cross)*nanometer)
154
155
156
157
158
                else:
                    # Just copy the residue over.
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
Peter Eastman's avatar
Peter Eastman committed
159
                        newPositions.append(deepcopy(self.positions[atom.index]))
160
161
162
163
164
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        self.topology = newTopology
        self.positions = newPositions
Peter Eastman's avatar
Peter Eastman committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

    def addSolvent(self, forcefield, model='tip3p', boxSize=None, padding=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar):
        """Add solvent (both water and ions) to the model to fill a rectangular box.
        
        The algorithm works as follows:
        1. Water molecules are added to fill the box.
        2. Water molecules are removed if their distance to any solute atom is less than the sum of their van der Waals radii.
        3. If the solute is charged, enough positive or negative ions are added to neutralize it.  Each ion is added by
           randomly selecting a water molecule and replacing it with the ion.
        4. Ion pairs are added to give the requested total ionic strength.
        
        The box size can be specified in three ways.  First, you can explicitly give a box size to use.  Alternatively, you can
        give a padding distance.  The largest dimension of the solute (along the x, y, or z axis) is determined, and a cubic
        box of size (largest dimension)+2*padding is used.  Finally, if neither a box size nor a padding distance is specified,
        the existing Topology's unit cell dimensions are used.
        
        Parameters:
         - forcefield (ForceField) the ForceField to use for determining van der Waals radii and atomic charges
Peter Eastman's avatar
Peter Eastman committed
183
         - model (string='tip3p') the water model to use.  Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
Peter Eastman's avatar
Peter Eastman committed
184
185
186
187
188
189
190
191
192
193
194
         - boxSize (Vec3=None) the size of the box to fill with water
         - padding (distance=None) the padding distance to use
         - positiveIon (string='Na+') the type of positive ion to add.  Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'
         - negativeIon (string='Cl-') the type of negative ion to add.  Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware
           that not all force fields support all ion types.
         - ionicString (concentration=0*molar) the total concentration of ions (both positive and negative) to add.  This
           does not include ions that are added to neutralize the system.
        """
        # Pick a unit cell size.
        
        if boxSize is not None:
195
196
197
            if is_quantity(boxSize):
                boxSize = boxSize.value_in_unit(nanometer)
            box = Vec3(boxSize[0], boxSize[1], boxSize[2])*nanometer
Peter Eastman's avatar
Peter Eastman committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
        elif padding is not None:
            maxSize = max(max((pos[i] for pos in self.positions))-min((pos[i] for pos in self.positions)) for i in range(3))
            box = (maxSize+2*padding)*Vec3(1, 1, 1)
        else:
            box = topology.getUnitCellDimensions()
            if box is None:
                raise ValueError('Neither the box size nor padding was specified, and the Topology does not define unit cell dimensions')
        box = box.value_in_unit(nanometer)
        invBox = Vec3(1.0/box[0], 1.0/box[1], 1.0/box[2])
        
        # Identify the ion types.
        
        posIonElements = {'Cs+':elem.cesium, 'K+':elem.potassium, 'Li+':elem.lithium, 'Na+':elem.sodium, 'Rb+':elem.rubidium}
        negIonElements = {'Cl-':elem.chlorine, 'Br-':elem.bromine, 'F-':elem.fluorine, 'I-':elem.iodine}
        if positiveIon not in posIonElements:
            raise ValueError('Illegal value for positive ion: %s' % positiveIon)
        if negativeIon not in negIonElements:
            raise ValueError('Illegal value for negative ion: %s' % negativeIon)
        positiveElement = posIonElements[positiveIon]
        negativeElement = negIonElements[negativeIon]
        
        # Load the pre-equilibrated water box.
        
        vdwRadiusPerSigma = 0.5612310241546864907
        if model == 'tip3p':
            waterRadius = 0.31507524065751241*vdwRadiusPerSigma
Peter Eastman's avatar
Peter Eastman committed
224
225
        elif model == 'spce':
            waterRadius = 0.31657195050398818*vdwRadiusPerSigma
Peter Eastman's avatar
Peter Eastman committed
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
        elif model == 'tip4pew':
            waterRadius = 0.315365*vdwRadiusPerSigma
        elif model == 'tip5p':
            waterRadius = 0.312*vdwRadiusPerSigma
        else:
            raise ValueError('Unknown water model: %s' % model)
        pdb = PDBFile(os.path.join(os.path.dirname(__file__), 'data', model+'.pdb'))
        pdbTopology = pdb.getTopology()
        pdbPositions = pdb.getPositions().value_in_unit(nanometer)
        pdbResidues = list(pdbTopology.residues())
        pdbBoxSize = pdbTopology.getUnitCellDimensions().value_in_unit(nanometer)
        
        # Have the ForceField build a System for the solute from which we can determine van der Waals radii.
        
        system = forcefield.createSystem(self.topology)
        nonbonded = None
        for i in range(system.getNumForces()):
            if isinstance(system.getForce(i), NonbondedForce):
                nonbonded = system.getForce(i)
        if nonbonded is None:
            raise ValueError('The ForceField does not specify a NonbondedForce')
        cutoff = [nonbonded.getParticleParameters(i)[1].value_in_unit(nanometer)*vdwRadiusPerSigma+waterRadius for i in range(system.getNumParticles())]
        waterCutoff = 2*waterRadius
Peter Eastman's avatar
Peter Eastman committed
249
250
251
252
        if len(cutoff) == 0:
            maxCutoff = waterCutoff
        else:
            maxCutoff = max(waterCutoff, max(cutoff))
Peter Eastman's avatar
Peter Eastman committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
        
        # Copy the solute over.

        newTopology = Topology()
        newTopology.setUnitCellDimensions(box)
        newAtoms = {}
        newPositions = []*nanometer
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
                for atom in residue.atoms():
                    newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                    newAtoms[atom] = newAtom
                    newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        
        # Sort the solute atoms into cells for fast lookup.
        
Peter Eastman's avatar
Peter Eastman committed
274
275
276
277
        if len(self.positions) == 0:
            positions = []
        else:
            positions = self.positions.value_in_unit(nanometer)
Peter Eastman's avatar
Peter Eastman committed
278
        cells = {}
Peter Eastman's avatar
Peter Eastman committed
279
        numCells = tuple((max(1, int(floor(box[i]/maxCutoff))) for i in range(3)))
Peter Eastman's avatar
Peter Eastman committed
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
        cellSize = tuple((box[i]/numCells[i] for i in range(3)))
        for i in range(len(positions)):
            cell = tuple((int(floor(positions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
            if cell in cells:
                cells[cell].append(i)
            else:
                cells[cell] = [i]
        
        # Create a generator that loops over atoms close to a position.
        
        def neighbors(pos):
            centralCell = tuple((int(floor(pos[i]/cellSize[i])) for i in range(3)))
            offsets = (-1, 0, 1)
            for i in offsets:
                for j in offsets:
                    for k in offsets:
                        cell = ((centralCell[0]+i+numCells[0])%numCells[0], (centralCell[1]+j+numCells[1])%numCells[1], (centralCell[2]+k+numCells[2])%numCells[2])
                        if cell in cells:
                            for atom in cells[cell]:
                                yield atom
        
        # Define a function to compute the distance between two points, taking periodic boundary conditions into account.
        
        def periodicDistance(pos1, pos2):
            delta = pos1-pos2
            delta = [delta[i]-floor(delta[i]*invBox[i]+0.5)*box[i] for i in range(3)]
306
            return norm(delta)
Peter Eastman's avatar
Peter Eastman committed
307
308
309
310
        
        # Find the list of water molecules to add.
        
        newChain = newTopology.addChain()
Peter Eastman's avatar
Peter Eastman committed
311
312
313
314
315
        if len(positions) == 0:
            center = Vec3(0, 0, 0)
        else:
            center = [(max((pos[i] for pos in positions))+min((pos[i] for pos in positions)))/2 for i in range(3)]
            center = Vec3(center[0], center[1], center[2])
Peter Eastman's avatar
Peter Eastman committed
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
        numBoxes = [int(ceil(box[i]/pdbBoxSize[i])) for i in range(3)]
        addedWaters = []
        for boxx in range(numBoxes[0]):
            for boxy in range(numBoxes[1]):
                for boxz in range(numBoxes[2]):
                    offset = Vec3(boxx*pdbBoxSize[0], boxy*pdbBoxSize[1], boxz*pdbBoxSize[2])
                    for residue in pdbResidues:
                        oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
                        atomPos = pdbPositions[oxygen.index]+offset
                        if not any((atomPos[i] > box[i] for i in range(3))):
                            # This molecule is inside the box, so see how close to it is to the solute.
                            
                            atomPos += center-box/2
                            for i in neighbors(atomPos):
                                if periodicDistance(atomPos, positions[i]) < cutoff[i]:
                                    break
                            else:
                                # Record this water molecule as one to add.
                            
                                addedWaters.append((residue.index, atomPos))
        
        # There could be clashes between water molecules at the box edges.  Find ones to remove.
        
        upperCutoff = center+box/2-Vec3(waterCutoff, waterCutoff, waterCutoff)
        lowerCutoff = center-box/2+Vec3(waterCutoff, waterCutoff, waterCutoff)
        lowerSkinPositions = [pos for index, pos in addedWaters if pos[0] < lowerCutoff[0] or pos[1] < lowerCutoff[1] or pos[2] < lowerCutoff[2]]
        filteredWaters = []
        cells = {}
        for i in range(len(lowerSkinPositions)):
            cell = tuple((int(floor(lowerSkinPositions[i][j]/cellSize[j]))%numCells[j] for j in range(3)))
            if cell in cells:
                cells[cell].append(i)
            else:
                cells[cell] = [i]
        for entry in addedWaters:
            pos = entry[1]
            if pos[0] < upperCutoff[0] and pos[1] < upperCutoff[1] and pos[2] < upperCutoff[2]:
                filteredWaters.append(entry)
            else:
Peter Eastman's avatar
Peter Eastman committed
355
                if not any((periodicDistance(lowerSkinPositions[i], pos) < waterCutoff and norm(lowerSkinPositions[i]-pos) > waterCutoff for i in neighbors(pos))):
Peter Eastman's avatar
Peter Eastman committed
356
357
358
359
360
                    filteredWaters.append(entry)
        addedWaters = filteredWaters
        
        # Add ions to neutralize the system.
        
361
        totalCharge = int(floor(0.5+sum((nonbonded.getParticleParameters(i)[0].value_in_unit(elementary_charge) for i in range(system.getNumParticles())))))
Peter Eastman's avatar
Peter Eastman committed
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
        if abs(totalCharge) > len(addedWaters):
            raise Exception('Cannot neutralize the system because the charge is greater than the number of available positions for ions')
        def addIon(element):
            # Replace a water by an ion.
            index = random.randint(0, len(addedWaters)-1)
            newResidue = newTopology.addResidue(element.symbol.upper(), newChain)
            newTopology.addAtom(element.symbol, element, newResidue)
            newPositions.append(addedWaters[index][1]*nanometer)
            del addedWaters[index]
        for i in range(abs(totalCharge)):
            addIon(positiveElement if totalCharge < 0 else negativeElement)
        
        # Add ions based on the desired ionic strength.
        
        numIons = len(addedWaters)*ionicStrength/(55.4*molar) # Pure water is about 55.4 molar (depending on temperature)
        numPairs = int(floor(numIons/2+0.5))
        for i in range(numPairs):
            addIon(positiveElement)
        for i in range(numPairs):
            addIon(negativeElement)
        
        # Add the water molecules.
        
        for index, pos in addedWaters:
            newResidue = newTopology.addResidue(residue.name, newChain)
            residue = pdbResidues[index]
            oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
            oPos = pdbPositions[oxygen.index]
            molAtoms = []
            for atom in residue.atoms():
                molAtoms.append(newTopology.addAtom(atom.name, atom.element, newResidue))
                newPositions.append((pos+pdbPositions[atom.index]-oPos)*nanometer)
            for atom1 in molAtoms:
                if atom1.element == elem.oxygen:
                    for atom2 in molAtoms:
                        if atom2.element == elem.hydrogen:
                            newTopology.addBond(atom1, atom2)
        newTopology.setUnitCellDimensions(deepcopy(box)*nanometer)
        self.topology = newTopology
        self.positions = newPositions
Peter Eastman's avatar
Peter Eastman committed
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
    
    class _ResidueData:
        """Inner class used to encapsulate data about the hydrogens for a residue."""
        def __init__(self, name):
            self.name = name
            self.variants = []
            self.hydrogens = []
    
    class _Hydrogen:
        """Inner class used to encapsulate data about a hydrogen atom."""
        def __init__(self, name, parent, maxph, variants, terminal):
            self.name = name
            self.parent = parent
            self.maxph = maxph
            self.variants = variants
            self.terminal = terminal
    
    def addHydrogens(self, forcefield, pH=7.0, variants=None):
        """Add missing hydrogens to the model.
        
        Some residues can exist in multiple forms depending on the pH and properties of the local environment.  These
        variants differ in the presence or absence of particular hydrogens.  In particular, the following variants
        are supported:
        
        Aspartic acid:
            ASH: Neutral form with a hydrogen on one of the delta oxygens
            ASP: Negatively charged form without a hydrogen on either delta oxygen
429
            
Peter Eastman's avatar
Peter Eastman committed
430
431
432
        Cysteine:
            CYS: Neutral form with a hydrogen on the sulfur
            CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond)
433
            
Peter Eastman's avatar
Peter Eastman committed
434
435
436
        Glutamic acid:
            GLH: Neutral form with a hydrogen on one of the epsilon oxygens
            GLU: Negatively charged form without a hydrogen on either epsilon oxygen
437
            
Peter Eastman's avatar
Peter Eastman committed
438
439
440
441
        Histidine:
            HID: Neutral form with a hydrogen on the ND1 atom
            HIE: Neutral form with a hydrogen on the NE2 atom
            HIP: Positively charged form with hydrogens on both ND1 and NE2
442
            
Peter Eastman's avatar
Peter Eastman committed
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
        Lysine:
            LYN: Neutral form with two hydrogens on the zeta nitrogen
            LYS: Positively charged form with three hydrogens on the zeta nitrogen
        
        The variant to use for each residue is determined by the following rules:
        
        1. The most common variant at the specified pH is selected.
        2. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH.
        3. For a neutral Histidine residue, the HID or HIE variant is selected based on which one forms a better hydrogen bond.
        
        You can override these rules by explicitly specifying a variant for any residue.  Also keep in mind that this
        function will only add hydrogens.  It will never remove ones that are already present in the model, regardless
        of the specified pH.
        
        Parameters:
         - forcefield (ForceField) the ForceField to use for determining the positions of hydrogens
         - pH (float=7.0) the pH based on which to select variants
         - variants (list=None) an optional list of variants to use.  If this is specified, its length must equal the number
           of residues in the model.  variants[i] is the name of the variant to use for residue i (indexed starting at 0).
           If an element is None, the standard rules will be followed to select a variant for that residue.
        """
        # Check the list of variants.
        
        residues = list(self.topology.residues())
        if variants is not None:
            if len(variants) != len(residues):
                raise ValueError("The length of the variants list must equal the number of residues")
        else:
            variants = [None]*len(residues)
        
        # Load the residue specifications.
        
        if Modeller._residueHydrogens is None:
            Modeller._residueHydrogens = {}
            tree = etree.parse(os.path.join(os.path.dirname(__file__), 'data', 'hydrogens.xml'))
            infinity = float('Inf')
            for residue in tree.getroot().findall('Residue'):
                resName = residue.attrib['name']
                data = Modeller._ResidueData(resName)
                Modeller._residueHydrogens[resName] = data
                for variant in residue.findall('Variant'):
                    data.variants.append(variant.attrib['name'])
                for hydrogen in residue.findall('H'):
                    maxph = infinity
                    if 'maxph' in hydrogen.attrib:
                        maxph = float(hydrogen.attrib['maxph'])
                    atomVariants = None
                    if 'variant' in hydrogen.attrib:
                        atomVariants = hydrogen.attrib['variant'].split(',')
                    terminal = None
                    if 'terminal' in hydrogen.attrib:
                        terminal = hydrogen.attrib['terminal']
                    data.hydrogens.append(Modeller._Hydrogen(hydrogen.attrib['name'], hydrogen.attrib['parent'], maxph, atomVariants, terminal))

        # Make a list of atoms bonded to each atom.
        
        bonded = {}
500
501
        for atom in self.topology.atoms():
            bonded[atom] = []
Peter Eastman's avatar
Peter Eastman committed
502
503
504
505
        for atom1, atom2 in self.topology.bonds():
            bonded[atom1].append(atom2)
            bonded[atom2].append(atom1)
        
506
507
508
509
510
511
512
513
514
515
516
        # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria.
        
        def isHbond(d, h, a):
            if norm(d-a) > 0.35*nanometer:
                return False
            deltaDH = h-d
            deltaHA = a-h
            deltaDH /= norm(deltaDH)
            deltaHA /= norm(deltaHA)
            return acos(dot(deltaDH, deltaHA)) < 50*degree
        
Peter Eastman's avatar
Peter Eastman committed
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
        # Loop over residues.
        
        newTopology = Topology()
        newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions()))
        newAtoms = {}
        newPositions = []*nanometer
        newIndices = []
        acceptors = [atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen)]
        for chain in self.topology.chains():
            newChain = newTopology.addChain()
            for residue in chain.residues():
                newResidue = newTopology.addResidue(residue.name, newChain)
                isNTerminal = (residue == chain._residues[0])
                isCTerminal = (residue == chain._residues[-1])
                if residue.name in Modeller._residueHydrogens:
                    # Add hydrogens.  First select which variant to use.
                    
                    spec = Modeller._residueHydrogens[residue.name]
                    variant = variants[residue.index]
                    if variant is None:
                        if residue.name == 'CYS':
                            # If this is part of a disulfide, use CYX.
                            
                            sulfur = [atom for atom in residue.atoms() if atom.element == elem.sulfur]
                            if len(sulfur) == 1 and any((atom.residue != residue for atom in bonded[sulfur[0]])):
                                variant = 'CYX'
                        if residue.name == 'HIS' and pH > 6.5:
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
                            # See if either nitrogen already has a hydrogen attached.
                            
                            nd1 = [atom for atom in residue.atoms() if atom.name == 'ND1']
                            ne2 = [atom for atom in residue.atoms() if atom.name == 'NE2']
                            if len(nd1) != 1 or len(ne2) != 1:
                                raise ValueError('HIS residue (%d) has the wrong set of atoms' % residue.index)
                            nd1 = nd1[0]
                            ne2 = ne2[0]
                            nd1HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[nd1]))
                            ne2HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[ne2]))
                            if nd1HasHydrogen and ne2HasHydrogen:
                                variant = 'HIP'
                            elif nd1HasHydrogen:
                                variant = 'HID'
                            elif ne2HasHydrogen:
                                variant = 'HIE'
                            else:
                                # Estimate the hydrogen positions.
                                
                                nd1Pos = self.positions[nd1.index]
                                ne2Pos = self.positions[ne2.index]
                                hd1Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[nd1]:
                                    hd1Delta += nd1Pos-self.positions[other.index]
                                hd1Delta *= 0.1*nanometer/norm(hd1Delta)
                                hd1Pos = nd1Pos+hd1Delta
                                he2Delta = Vec3(0, 0, 0)*nanometer
                                for other in bonded[ne2]:
                                    he2Delta += ne2Pos-self.positions[other.index]
                                he2Delta *= 0.1*nanometer/norm(he2Delta)
                                he2Pos = ne2Pos+he2Delta

                                # See whether either hydrogen would form a hydrogen bond.
    
                                nd1IsBonded = False
                                ne2IsBonded = False
                                for acceptor in acceptors:
                                    if acceptor.residue != residue:
                                        acceptorPos = self.positions[acceptor.index]
                                        if isHbond(nd1Pos, hd1Pos, acceptorPos):
                                            nd1IsBonded = True
                                            break
                                        if isHbond(ne2Pos, he2Pos, acceptorPos):
                                            ne2IsBonded = True
                                if ne2IsBonded and not nd1IsBonded:
                                    variant = 'HIE'
                                else:
                                    variant = 'HID'
Peter Eastman's avatar
Peter Eastman committed
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
                        elif residue.name == 'HIS':
                            variant = 'HIP'
                    if variant is not None and variant not in spec.variants:
                        raise ValueError('Illegal variant for %s residue: %s' % (residue.name, variant))
                    
                    # Make a list of hydrogens that should be present in the residue.
                    
                    parents = [atom for atom in residue.atoms() if atom.element != elem.hydrogen]
                    parentNames = [atom.name for atom in parents]
                    hydrogens = [h for h in spec.hydrogens if (variant is None and pH <= h.maxph) or (h.variants is None and pH <= h.maxph) or (h.variants is not None and variant in h.variants)]
                    hydrogens = [h for h in hydrogens if h.terminal is None or (isNTerminal and h.terminal == 'N') or (isCTerminal and h.terminal == 'C')]
                    hydrogens = [h for h in hydrogens if h.parent in parentNames]
                    
                    # Loop over atoms in the residue, adding them to the new topology along with required hydrogens.
                    
                    for parent in residue.atoms():
                        # Add the atom.
                        
                        newAtom = newTopology.addAtom(parent.name, parent.element, newResidue)
                        newAtoms[parent] = newAtom
                        newPositions.append(deepcopy(self.positions[parent.index]))
                        if parent in parents:
                            # Match expected hydrogens with existing ones and find which ones need to be added.
                            
                            existing = [atom for atom in bonded[parent] if atom.element == elem.hydrogen]
                            expected = [h for h in hydrogens if h.parent == parent.name]
                            if len(existing) < len(expected):
                                # Try to match up existing hydrogens to expected ones.
                                
                                matches = []
                                for e in existing:
                                    match = [h for h in expected if h.name == e.name]
                                    if len(match) > 0:
                                        matches.append(match[0])
                                        expected.remove(match[0])
                                    else:
                                        matches.append(None)
                                
                                # If any hydrogens couldn't be matched by name, just match them arbitrarily.
                                
                                for i in range(len(matches)):
                                    if matches[i] is None:
                                        matches[i] = expected[-1]
                                        expected.remove(expected[-1])
                                
                                # Add the missing hydrogens.
                                
                                for h in expected:
                                    newH = newTopology.addAtom(h.name, elem.hydrogen, newResidue)
                                    newIndices.append(newH.index)
642
643
                                    delta = Vec3(0, 0, 0)*nanometer
                                    if len(bonded[parent]) > 0:
Peter Eastman's avatar
Peter Eastman committed
644
645
646
647
                                        for other in bonded[parent]:
                                            delta += self.positions[parent.index]-self.positions[other.index]
                                    else:
                                        delta = Vec3(random.random(), random.random(), random.random())*nanometer
648
649
650
651
                                    delta *= 0.1*nanometer/norm(delta)
                                    if len(expected) > 1:
                                        delta += 0.05*Vec3(random.random(), random.random(), random.random())*nanometer
                                        delta *= 0.1*nanometer/norm(delta)
Peter Eastman's avatar
Peter Eastman committed
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
                                    newPositions.append(self.positions[parent.index]+delta)
                                    newTopology.addBond(newAtom, newH)
                else:
                    # Just copy over the residue.
                    
                    for atom in residue.atoms():
                        newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
                        newAtoms[atom] = newAtom
                        newPositions.append(deepcopy(self.positions[atom.index]))
        for bond in self.topology.bonds():
            if bond[0] in newAtoms and bond[1] in newAtoms:
                newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        
        # The hydrogens were added at random positions.  Now use the ForceField to fix them up.
        
667
        system = forcefield.createSystem(newTopology, rigidWater=False)
Peter Eastman's avatar
Peter Eastman committed
668
        atoms = list(newTopology.atoms())
669
670
671
672
673
        for i in range(system.getNumParticles()):
            if atoms[i].element != elem.hydrogen:
                # This is a heavy atom, so make it immobile.
                system.setParticleMass(i, 0)
        context = Context(system, VerletIntegrator(0.0))
Peter Eastman's avatar
Peter Eastman committed
674
        context.setPositions(newPositions)
675
        LocalEnergyMinimizer.minimize(context)
Peter Eastman's avatar
Peter Eastman committed
676
677
        self.topology = newTopology
        self.positions = context.getState(getPositions=True).getPositions()