generate-unit-test-expect.py 1.53 KB
Newer Older
1
2
import os
import pickle
3
4
5
6
7
8
import pyanitools
import numpy
from neurochem_calculator import NeuroChem, path
import json
import tqdm
import random
9
10


11
neurochem = NeuroChem()
12

13
# generate expect for ANI1 subset
14
15
16
mol_count = 0
for i in [1, 2, 3, 4]:
    data_file = os.path.join(
Gao, Xiang's avatar
Gao, Xiang committed
17
        path, '../dataset/ani_gdb_s0{}.h5'.format(i))
18
19
    adl = pyanitools.anidataloader(data_file)
    for data in tqdm.tqdm(adl, desc='ANI1: {} heavy atoms'.format(i)):
20
        coordinates = data['coordinates'][:10, :]
21
        pickleobj = neurochem(coordinates, data['species'])
22
        dumpfile = os.path.join(
23
            path, '../tests/test_data/ANI1_subset/{}'.format(mol_count))
24
        with open(dumpfile, 'wb') as f:
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
            pickle.dump(pickleobj, f)
        mol_count += 1


# generate expect for NIST
keep_ratio = 0.1  # reduce the size of generated file by discarding
mol_count = 0
with open(os.path.join(path, 'diverse_test_set/result.json')) as f:
    pickle_objects = []
    for i in tqdm.tqdm(json.load(f), desc='NIST'):
        if random.random() > keep_ratio:
            continue
        atoms = i['atoms']
        natoms = len(atoms)
        species = []
        coordinates = []
        for atype, x, y, z in atoms:
            species.append(atype)
            coordinates.append([x, y, z])
        pickleobj = neurochem(numpy.array(coordinates), species)
        pickle_objects.append(pickleobj)
46
        mol_count += 1
47
48
49
50
51

    dumpfile = os.path.join(
        path, '../tests/test_data/NIST/all')
    with open(dumpfile, 'wb') as f:
        pickle.dump(pickle_objects, f)