proj_dm.py 5.96 KB
Newer Older
yuhai's avatar
yuhai committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import numpy as np
from pyscf import gto
import os
import sys
import argparse
import mendeleev
from calc_eig import calc_eig


# aa = 2.0**np.arange(6,-3,-1)
aa = 1.5**np.array([17,13,10,7,5,3,2,1,0,-1,-2,-3])
bb = np.diag(np.ones(aa.size)) - np.diag(np.ones(aa.size-1), k=1)
SHELL = [aa.size] * 3
coef = np.concatenate([aa.reshape(-1,1), bb], axis=1)
BASIS = [[0, *coef.tolist()], [1, *coef.tolist()], [2, *coef.tolist()]]


def parse_xyz(filename, basis='ccpvtz', verbose=False):
    with open(filename) as fp:
        natoms = int(fp.readline())
        comments = fp.readline()
        xyz_str = "".join(fp.readlines())
    mol = gto.Mole()
    mol.verbose = 4 if verbose else 0
    mol.atom = xyz_str
    mol.basis  = basis
    try:
        mol.build(0,0,unit="Ang")
    except RuntimeError as e:
        mol.spin = 1
        mol.build(0,0,unit="Ang")
    return mol  


def gen_proj(mol, intor = 'ovlp', verbose = False) :
    natm = mol.natm
    mole_coords = mol.atom_coords(unit="Ang")
    test_mol = gto.Mole()
    if verbose :
        test_mol.verbose = 4
    else :
        test_mol.verbose = 0
    test_mol.atom = [["Ne", coord] for coord in mole_coords]
    test_mol.basis = BASIS
    test_mol.spin = 0
    test_mol.build(0,0,unit="Ang")
    proj = gto.intor_cross(f'int1e_{intor}_sph', mol, test_mol) 
    
    def proj_func(mo):
        proj_coeff = np.matmul(mo, proj).reshape(*mo.shape[:2], natm, -1)
        if verbose:
            print('shape of coeff data          ', proj_coeff.shape)
        # res : nframe x nocc/nvir x natm x nproj
        return proj_coeff, proj_coeff.shape[-1]
    
    return proj_func


def proj_frame(xyz_file, mo_dir, dump_dir=None, basis='ccpvtz', ename="e_hf", intor='ovlp', verbose=False):
    mol = parse_xyz(xyz_file, basis=basis)
    meta, ehf, e_occ, c_occ = load_data(mo_dir, ename)
    
    proj_func = gen_proj(mol, intor, verbose)
    c_proj_occ,nproj = proj_func(c_occ)
    c_occ = c_proj_occ
    meta = np.append(meta, nproj)
    # print(meta, c_proj_occ.shape)

    if dump_dir is not None:
        dump_data(dump_dir, meta, ehf, e_occ, c_occ)
    return meta, ehf, e_occ, c_occ


def load_data(dir_name, ename="e_hf"):
    meta = np.loadtxt(os.path.join(dir_name, 'system.raw'), dtype=int).reshape(-1)
    natm = meta[0]
    nao = meta[1]
    nocc = meta[2]
    nvir = meta[3]
    ehf = np.loadtxt(os.path.join(dir_name, f'{ename}.raw')).reshape(-1, 1)
    e_occ = np.loadtxt(os.path.join(dir_name, 'ener_occ.raw')).reshape(-1, nocc)
    c_occ = np.loadtxt(os.path.join(dir_name, 'coeff_occ.raw')).reshape([-1, nocc, nao])
    return meta, ehf, e_occ, c_occ


def dump_data(dir_name, meta, ehf, e_occ, c_occ, dm_dict={}) :
    os.makedirs(dir_name, exist_ok = True)
    np.savetxt(os.path.join(dir_name, 'system.raw'), 
               meta.reshape(1,-1), 
               fmt = '%d',
               header = 'natm nao nocc nvir nproj')
    nframe = e_occ.shape[0]
    natm = meta[0]
    nao = meta[1]
    nocc = meta[2]
    nvir = meta[3]
    nproj = meta[4]
    # ntest == natm
    assert(all(c_occ.shape == np.array([nframe, nocc, natm, nproj], dtype=int)))
    assert(all(e_occ.shape == np.array([nframe, nocc], dtype=int)))
    assert(all(all(dm.shape == np.array([nframe, natm, nproj], dtype=int)) for dm in dm_dict.values()))
    np.save(os.path.join(dir_name, 'e_hf.npy'), ehf) 
    np.save(os.path.join(dir_name, 'ener_occ.npy'), e_occ)
    np.save(os.path.join(dir_name, 'coeff_occ.npy'), c_occ)
    for name, dm in dm_dict.items():
        np.save(os.path.join(dir_name, f'{name}.npy'), dm)


def main(xyz_files, mo_dirs, dump_dir, basis='ccpvtz', ename="e_hf", eig_names=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], intor='ovlp', verbose='False'):
    assert len(xyz_files) == len(mo_dirs)
    oldmeta = None
    all_e_hf = []
    all_e_occ = []
    all_c_occ = []
    all_dm_dict = {name:[] for name in eig_names}
    
    for xf, md in zip(xyz_files, mo_dirs):
        meta, e_hf, e_occ, c_occ = proj_frame(xf, md, basis=basis, ename=ename, intor=intor, verbose=verbose)
        if oldmeta is not None:
            assert all(oldmeta == meta), "all frames has to be in the same system thus meta has to be equal!"
        oldmeta = meta
        all_e_hf.append(e_hf)
        all_e_occ.append(e_occ)
        all_c_occ.append(c_occ)
        for name, dm_list in all_dm_dict.items():
            dm_list.append(2 * calc_eig(name, c_occ, e_occ, xf, shell=SHELL)) # multiply by 2 for restricted method, doubly occupied orbitals
        print(f"{xf} && {md} finished")

    all_e_hf = np.concatenate(all_e_hf)
    all_e_occ = np.concatenate(all_e_occ)
    all_c_occ = np.concatenate(all_c_occ)
    for name in all_dm_dict.keys():
        all_dm_dict[name] = np.concatenate(all_dm_dict[name])

    dump_data(dump_dir, meta, all_e_hf, all_e_occ, all_c_occ, all_dm_dict)
    print("done")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="project mo_coeffs into atomic basis and calculate descriptors.")
    parser.add_argument("-x", "--xyz-file", nargs="+", help="input xyz file(s), if more than one, concat them")
    parser.add_argument("-f", "--mo-dir", nargs="+", help="input mo folder(s), must of same number with xyz files")
    parser.add_argument("-d", "--dump-dir", default=".", help="dir of dumped files, if not specified, use current folder")
    parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
    parser.add_argument("-I", "--intor", default="ovlp", help="intor string used to calculate int1e")
    parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
    parser.add_argument("-e", "--ename", default="e_hf", help="file name for total energy")
    parser.add_argument("-E", "--eig-name", nargs="*", default=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], 
                        help="name of eigen values to be calculated and dumped")
    args = parser.parse_args()
    
    main(args.xyz_file, args.mo_dir, args.dump_dir, args.basis,
         args.ename, args.eig_name, args.intor, args.verbose)