"test/vscode:/vscode.git/clone" did not exist on "dd3809fad8de5519bef52f14fe0da2496848b28c"
generate_data.py 2.78 KB
Newer Older
Xiang Gao's avatar
Xiang Gao committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from ase import Atoms
from ase.calculators.tip3p import TIP3P, rOH, angleHOH
from ase.md import Langevin
import ase.units as units
from ase.io.trajectory import Trajectory
import numpy
import h5py
from rdkit import Chem
from rdkit.Chem import AllChem
# from asap3 import EMT
from ase.calculators.emt import EMT
from multiprocessing import Pool
from tqdm import tqdm, tqdm_notebook, trange
tqdm.monitor_interval = 0
from selected_system import mols, mol_file

import functools

conformations = 1024
T = 30

fw = h5py.File("waters.hdf5", "w")
fm = h5py.File(mol_file, "w")


def save(h5file, name, species, coordinates):
    h5file[name] = coordinates
    h5file[name].attrs['species'] = ' '.join(species)


def waterbox(x, y, z, tqdmpos):
    name = '{}_waters'.format(x*y*z)
    # Set up water box at 20 deg C density
    a = angleHOH * numpy.pi / 180 / 2
    pos = [[0, 0, 0],
           [0, rOH * numpy.cos(a), rOH * numpy.sin(a)],
           [0, rOH * numpy.cos(a), -rOH * numpy.sin(a)]]
    atoms = Atoms('OH2', positions=pos)

    vol = ((18.01528 / 6.022140857e23) / (0.9982 / 1e24))**(1 / 3.)
    atoms.set_cell((vol, vol, vol))
    atoms.center()

    atoms = atoms.repeat((x, y, z))
    atoms.set_pbc(False)
    species = atoms.get_chemical_symbols()

    atoms.calc = TIP3P()
    md = Langevin(atoms, 1 * units.fs, temperature=T *
                  units.kB, friction=0.01)

    def generator(n):
        for _ in trange(n, desc=name, position=tqdmpos):
            md.run(1)
            positions = atoms.get_positions()
            yield positions

    save(fw, name, species, numpy.stack(generator(conformations)))


def compute(smiles):
    m = Chem.MolFromSmiles(smiles)
    m = Chem.AddHs(m)
    AllChem.EmbedMolecule(m, useRandomCoords=True)
    AllChem.UFFOptimizeMolecule(m)
    pos = m.GetConformer().GetPositions()
    natoms = m.GetNumAtoms()
    species = [m.GetAtomWithIdx(j).GetSymbol() for j in range(natoms)]

    atoms = Atoms(species, positions=pos)

    atoms.calc = EMT()
    md = Langevin(atoms, 1 * units.fs, temperature=T *
                  units.kB, friction=0.01)

    def generator(n):
        for _ in range(n):
            md.run(1)
            positions = atoms.get_positions()
            yield positions

    c = numpy.stack(generator(conformations))
    return smiles.replace('/', '_'), species, c


def molecules():
    smiles = [s for atoms in mols for s in mols[atoms]]
    with Pool() as p:
        return p.map(compute, smiles)


if __name__ == '__main__':
    for i in molecules():
        save(fm, *i)
    print(list(fm.keys()))
    print('done with molecules')

    with Pool() as p:
        p.starmap(waterbox, [(10, 10, 10, 0), (20, 20, 10,
                                               1), (30, 30, 30, 2), (40, 40, 40, 3)])
        print(list(fw.keys()))
        print('done with water boxes')