nist.py 929 Bytes
Newer Older
1
2
import os
import pickle
3
4
5
6
7
import numpy
from neurochem_calculator import NeuroChem, path
import json
import tqdm
import random
8
9


10
11
12
neurochem = NeuroChem()
keep_ratio = 0.1  # reduce the size of generated file by discarding
mol_count = 0
13
with open(os.path.join(path, 'nist-dataset/result.json')) as f:
14
15
16
17
18
19
20
21
22
23
24
25
26
    pickle_objects = []
    for i in tqdm.tqdm(json.load(f), desc='NIST'):
        if random.random() > keep_ratio:
            continue
        atoms = i['atoms']
        natoms = len(atoms)
        species = []
        coordinates = []
        for atype, x, y, z in atoms:
            species.append(atype)
            coordinates.append([x, y, z])
        pickleobj = neurochem(numpy.array(coordinates), species)
        pickle_objects.append(pickleobj)
27
        mol_count += 1
28
29

    dumpfile = os.path.join(
30
        path, '../../tests/test_data/NIST/all')
31
32
    with open(dumpfile, 'wb') as f:
        pickle.dump(pickle_objects, f)