Initial commit

47af8be9 · yuhai · 47af8be9 · 47af8be9 · 47af8be9 · 47af8be9
Commit 47af8be9 authored Jun 26, 2023 by yuhai
19 changed files
--- a/examples/water_single/systems/group.02/force.npy
+++ b/examples/water_single/systems/group.02/force.npy
--- a/examples/water_single/systems/group.03/atom.npy
+++ b/examples/water_single/systems/group.03/atom.npy
--- a/examples/water_single/systems/group.03/dm.npy
+++ b/examples/water_single/systems/group.03/dm.npy
--- a/examples/water_single/systems/group.03/energy.npy
+++ b/examples/water_single/systems/group.03/energy.npy
--- a/examples/water_single/systems/group.03/force.npy
+++ b/examples/water_single/systems/group.03/force.npy
--- a/examples/water_single/withdens/base.yaml
+++ b/examples/water_single/withdens/base.yaml
+# all arguments are flatten into this file
+# they can also be splitted into separate files and referenced here
+n_iter: 0 # use 0 as a placeholder
+
+# training and testing systems
+systems_train: # can also be files that containing system paths
+  - ../systems/group.0[0-2] # support glob
+
+systems_test: # if empty, use the last system of training set
+  - ../systems/group.03
+  
+# directory setting
+workdir: "."
+share_folder: "share" # folder that stores all other settings
+
+# scf settings
+scf_input: # can also be specified by a separete file
+  basis: ccpvdz
+  # this is for force training
+  dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
+  verbose: 1
+  mol_args:
+    incore_anyway: True
+  scf_args:
+    conv_tol: 1e-6
+    conv_tol_grad: 1e-2
+    level_shift: 0.1
+    diis_space: 20
+    conv_check: false # pyscf conv_check has a bug
+
+scf_machine: 
+  # every system will be run as a separate command (a task)
+  sub_size: 1 
+  # 4 tasks will be gathered into one group and submitted together as a shell script
+  group_size: 4
+  dispatcher: 
+    context: local
+    batch: shell # set to shell to run on local machine, you can also use `slurm`
+    remote_profile: null # not needed in local case
+  # resources are no longer needed, other than the envs can still be set here
+  resources:
+    envs:
+      PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G
+  python: "python" # use python in path
+
+# train settings
+train_input:
+  # model_args is ignored, since this is used as restart
+  data_args: 
+    batch_size: 16
+    group_batch: 1
+    extra_label: true
+    conv_filter: true
+    conv_name: conv
+  preprocess_args:
+    preshift: false # restarting model already shifted. Will not recompute shift value
+    prescale: false # same as above
+    prefit_ridge: 1e1
+    prefit_trainable: false
+  train_args: 
+    decay_rate: 0.5
+    decay_steps: 1000
+    display_epoch: 100
+    force_factor: 1
+    n_epoch: 5000
+    start_lr: 0.0001
+
+train_machine: 
+  dispatcher: 
+    context: local
+    batch: shell # same as above, use shell to run on local machine
+    remote_profile: null # use lazy local
+  python: "python" # use python in path
+  # resources are no longer needed, and the task will use gpu automatically if there is one
+
+# init settings
+init_model: false # do not use existing model in share_folder/init/model.pth
+
+init_scf: 
+  basis: ccpvdz
+  # this is for pure energy training
+  dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta]
+  verbose: 1
+  mol_args:
+    incore_anyway: True
+  scf_args:
+    conv_tol: 1e-8
+    conv_check: false # pyscf conv_check has a bug
+
+init_train: 
+  model_args: # necessary as this is init training
+    hidden_sizes: [100, 100, 100]
+    output_scale: 100
+    use_resnet: true
+    actv_fn: mygelu
+  data_args: 
+    batch_size: 16
+    group_batch: 1
+  preprocess_args:
+    preshift: true
+    prescale: false
+    prefit_ridge: 1e1
+    prefit_trainable: false
+  train_args: 
+    decay_rate: 0.96
+    decay_steps: 500
+    display_epoch: 100
+    n_epoch: 15000
+    start_lr: 0.0003
+
+# other settings
+cleanup: false
+strict: true
--- a/examples/water_single/withdens/penalty.yaml
+++ b/examples/water_single/withdens/penalty.yaml
+# overwriting the base config
+n_iter: 5
+
+# adding penalty
+scf_input: # can also be specified by a separete file
+  basis: ccpvdz
+  # this is for force training
+  dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
+  verbose: 1
+  mol_args:
+    incore_anyway: True
+  scf_args:
+    conv_tol: 1e-6
+    conv_tol_grad: 1e-2
+    level_shift: 0.1
+    diis_space: 20
+    conv_check: false # pyscf conv_check has a bug
+  penalty_terms:
+    # Coulomb loss as penalty, random strength 
+    - type: coulomb
+      required_labels: dm # where the label is stored (sysfolder/dm.npy)
+      strength: 1 # can be larger, like 5 
+      random: true # actual strength vary between [0, strength]
\ No newline at end of file
--- a/examples/water_single/withdens/pipe.sh
+++ b/examples/water_single/withdens/pipe.sh
+python -u -m deepks iterate base.yaml penalty.yaml >> log.iter 2> err.iter &&\
+python -u -m deepks iterate base.yaml relax.yaml >> log.iter 2> err.iter
\ No newline at end of file
--- a/examples/water_single/withdens/relax.yaml
+++ b/examples/water_single/withdens/relax.yaml
+# overwriting the base config to run longer
+n_iter: 10
\ No newline at end of file
--- a/examples/water_single/withdens/run.sh
+++ b/examples/water_single/withdens/run.sh
+nohup bash pipe.sh >/dev/null 2>&1 &
+echo $! > PID
--- a/requirements.txt
+++ b/requirements.txt
+numpy
+paramiko
+ruamel.yaml
+torch
+pyscf
--- a/scripts/convert_xyz.py
+++ b/scripts/convert_xyz.py
+import os
+import numpy as np
+from glob import glob
+
+
+BOHR = 0.52917721092
+ELEMENTS = ['X',  # Ghost
+    'H' , 'He', 'Li', 'Be', 'B' , 'C' , 'N' , 'O' , 'F' , 'Ne',
+    'Na', 'Mg', 'Al', 'Si', 'P' , 'S' , 'Cl', 'Ar', 'K' , 'Ca',
+    'Sc', 'Ti', 'V' , 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
+    'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y' , 'Zr',
+    'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn',
+    'Sb', 'Te', 'I' , 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',
+    'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb',
+    'Lu', 'Hf', 'Ta', 'W' , 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg',
+    'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th',
+    'Pa', 'U' , 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm',
+    'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds',
+    'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og',
+]
+CHARGES = dict(((x,i) for i,x in enumerate(ELEMENTS)))
+
+
+def parse_xyz(filename):
+    with open(filename) as fp:
+        natom = int(fp.readline())
+        comments = fp.readline().strip()
+        atom_str = fp.readlines()
+    atom_list = [a.split() for a in atom_str if a.strip()]
+    elements = [a[0] for a in atom_list]
+    coords = np.array([a[1:] for a in atom_list], dtype=float)
+    return natom, comments, elements, coords
+
+
+def parse_unit(rawunit):
+    if isinstance(rawunit, str):
+        try:
+            unit = float(rawunit)
+        except ValueError:
+            if rawunit.upper().startswith(('B', 'AU')):
+                unit = BOHR
+            else: #unit[:3].upper() == 'ANG':
+                unit = 1.
+    else:
+        unit = rawunit
+    return unit
+
+
+def load_array(file):
+    ext = os.path.splitext(file)[-1]
+    if "npy" in ext:
+        return np.load(file)
+    elif "npz" in ext:
+        raise NotImplementedError
+    else:
+        try:
+            arr = np.loadtxt(file)
+        except ValueError:
+            arr = np.loadtxt(file, dtype=str)
+        return arr
+
+
+def load_glob(pattern):
+    [fn] = glob(pattern)
+    return load_array(fn)
+
+
+def load_system(xyz_file):
+    base, ext = os.path.splitext(xyz_file)
+    assert ext == '.xyz'
+    natom, _, ele, coord = parse_xyz(xyz_file)
+    try:
+        energy = load_glob(f"{base}.energy*").reshape(1)
+    except:
+        energy = None
+    try:
+        force = load_glob(f"{base}.force*").reshape(natom, 3)
+    except:
+        force = None
+    try:
+        dm = load_glob(f"{base}.dm*")
+        nao = np.sqrt(dm.size).astype(int)
+        dm = dm.reshape(nao, nao)
+    except:
+        dm = None
+    return ele, coord, energy, force, dm
+
+
+def dump_systems(xyz_files, dump_dir, unit="Bohr", ext_type=False):
+    print(f"saving to {dump_dir} ... ", end="", flush=True)
+    os.makedirs(dump_dir, exist_ok=True)
+    if not xyz_files:
+        print("empty list! did nothing")
+        return
+    unit = parse_unit(unit)
+    a_ele, a_coord, a_energy, a_force, a_dm = map(np.array,
+        zip(*[load_system(fl) for fl in xyz_files]))
+    a_coord /= unit
+    if ext_type:
+        ele = a_ele[0]
+        assert all(e == ele for e in a_ele), "element type for each xyz file has to be the same"
+        np.savetxt(os.path.join(dump_dir, "type.raw"), ele, fmt="%s")
+        np.save(os.path.join(dump_dir, "coord.npy"), a_coord)
+    else:
+        a_chg = [[[CHARGES[e]] for e in ele] for ele in a_ele]
+        a_atom = np.concatenate([a_chg, a_coord], axis=-1)
+        np.save(os.path.join(dump_dir, "atom.npy"), a_atom)
+    if not all(ene is None for ene in a_energy):
+        assert not any(ele is None for ele in a_energy)
+        np.save(os.path.join(dump_dir, "energy.npy"), a_energy)
+    if not all(ff is None for ff in a_force):
+        assert not any(ff is None for ff in a_force)
+        a_force *= unit
+        np.save(os.path.join(dump_dir, "force.npy"), a_force)
+    if not all(dm is None for dm in a_dm):
+        assert not any(dm is None for dm in a_dm)
+        np.save(os.path.join(dump_dir, "dm.npy"), a_dm)
+    print(f"finished", flush=True)
+    return
+
+
+def main(xyz_files, dump_dir=".", group_size=-1, group_prefix="sys", unit="Bohr", ext_type=False):
+    if isinstance(xyz_files, str):
+        xyz_files = [xyz_files]
+    if group_size <= 0:
+        dump_systems(xyz_files, dump_dir, unit=unit, ext_type=ext_type)
+        return
+    ns = len(xyz_files)
+    ngroup = np.ceil(ns / group_size).astype(int)
+    nd = max(len(str(ngroup)), 2)
+    for i in range(ngroup):
+        dump_systems(xyz_files[i*group_size:(i+1)*group_size],
+                     os.path.join(dump_dir, f"{group_prefix}.{i:0>{nd}d}"),
+                     unit=unit, ext_type=ext_type)
+    return
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(
+        description="convert .xyz files and corresponding properties "
+                    "into systems with .npy files grouped in folders.",
+        argument_default=argparse.SUPPRESS)
+    parser.add_argument("xyz_files", metavar='FILE', nargs="+", 
+                        help="input xyz files")
+    parser.add_argument("-d", "--dump-dir", 
+                        help="directory of dumped system, default is current dir")
+    parser.add_argument("-U", "--unit", 
+                        help="length unit used to save npy files (assume xyz in Angstrom)")
+    parser.add_argument("-G", "--group-size", type=int, 
+                        help="if positive, split data into sub systems with given size, default: -1")
+    parser.add_argument("-P", "--group-prefix", 
+                        help=r"save sub systems with given prefix as `$dump_dir/$prefix.ii`, default: sys")
+    parser.add_argument("-T", "--ext-type", action="store_true", 
+                        help="if set, save the element type into separete `type.raw` file")
+    args = parser.parse_args()
+
+    main(**vars(args))
+
+
+
--- a/scripts/legacy/calc_eig.py
+++ b/scripts/legacy/calc_eig.py
+import numpy as np
+from scipy.spatial.distance import squareform, pdist
+
+
+def load_coords(filename):
+    return np.loadtxt(filename, skiprows=2, usecols=[1,2,3])
+
+
+def cosine_switching(x, lower=1.9, upper=2.0, threshold=1e-5):
+    zx = x < threshold
+    lx = x < lower
+    ux = x > upper
+    mx = (~lx) & (~ux)
+    res = np.zeros_like(x)
+    res[~zx & lx] = 1
+    res[mx] = 0.5*np.cos(np.pi * (x[mx]-lower) / (upper-lower)) + 0.5
+    return res
+
+
+def calc_weight(coords, lower=1.9, upper=2.0):
+    natom = coords.shape[0]
+    pair_dist = squareform(pdist(coords))
+    weight = cosine_switching(pair_dist, lower, upper).reshape(1, natom, natom)
+    return weight
+
+
+def split(ci, shell):
+    sec = [1]*shell[0] + [3]*shell[1] + [5]*shell[2]
+    assert np.sum(sec) == ci.shape[-1]
+    ci_list = np.split(ci, np.cumsum(sec)[:-1], axis=-1)
+    return ci_list
+
+
+def calc_atom_eig(ci, shell=(12,12,12), frozen=0):
+    ci_list = split(ci[:, frozen:], shell)
+    dm_list = [np.einsum('niap,niaq->napq', _ci, _ci) for _ci in ci_list]
+    eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list]
+    eig = np.concatenate(eig_list, -1)
+    return eig
+
+
+def calc_atom_ener_eig(ci, ei, kernel=None, shell=(12,12,12), frozen=0):
+    if kernel is not None:
+        ei = kernel(ei)
+    ci_list = split(ci[:, frozen:], shell)
+    dm_list = [np.einsum('niap,niaq,ni->napq', _ci, _ci, ei[:, frozen:]) for _ci in ci_list]
+    eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list]
+    eig = np.concatenate(eig_list, -1)
+    return eig
+
+
+def calc_neighbor_eig(ci, weight=None, shell=(12,12,12), frozen=0):
+    ci_list = split(ci[:, frozen:], shell)
+    dm_list = [np.einsum('niap,nibq->nabpq', _ci, _ci) for _ci in ci_list]
+    if weight is not None:
+        dm_list = [np.einsum('nabpq,nab->nabpq', _dm, weight) for _dm in dm_list]
+    eig_list = [np.linalg.eigvalsh(0.5*(_dm.sum(1) + _dm.sum(2))) for _dm in dm_list]
+    eig = np.concatenate(eig_list, -1)
+    return eig
+
+
+def calc_eig(name, ci, ei=None, xyz_file=None, shell=(12,12,12)):
+    if name == 'dm_eig':
+        return calc_atom_eig(ci, shell=shell)
+    if name == 'od_eig':
+        assert xyz_file is not None
+        return calc_neighbor_eig(ci, calc_weight(load_coords(xyz_file)), shell=shell)
+    if name == 'se_eig':
+        assert ei is not None
+        return calc_atom_ener_eig(ci, ei, kernel=None, shell=shell)
+    if name == 'fe_eig':
+        assert ei is not None
+        return calc_atom_ener_eig(ci, ei, kernel=np.exp, shell=shell)
+
+    raise ValueError(f'unsupport name: {name}')
\ No newline at end of file
--- a/scripts/legacy/proj_dm.py
+++ b/scripts/legacy/proj_dm.py
+import numpy as np
+from pyscf import gto
+import os
+import sys
+import argparse
+import mendeleev
+from calc_eig import calc_eig
+
+
+# aa = 2.0**np.arange(6,-3,-1)
+aa = 1.5**np.array([17,13,10,7,5,3,2,1,0,-1,-2,-3])
+bb = np.diag(np.ones(aa.size)) - np.diag(np.ones(aa.size-1), k=1)
+SHELL = [aa.size] * 3
+coef = np.concatenate([aa.reshape(-1,1), bb], axis=1)
+BASIS = [[0, *coef.tolist()], [1, *coef.tolist()], [2, *coef.tolist()]]
+
+
+def parse_xyz(filename, basis='ccpvtz', verbose=False):
+    with open(filename) as fp:
+        natoms = int(fp.readline())
+        comments = fp.readline()
+        xyz_str = "".join(fp.readlines())
+    mol = gto.Mole()
+    mol.verbose = 4 if verbose else 0
+    mol.atom = xyz_str
+    mol.basis  = basis
+    try:
+        mol.build(0,0,unit="Ang")
+    except RuntimeError as e:
+        mol.spin = 1
+        mol.build(0,0,unit="Ang")
+    return mol  
+
+
+def gen_proj(mol, intor = 'ovlp', verbose = False) :
+    natm = mol.natm
+    mole_coords = mol.atom_coords(unit="Ang")
+    test_mol = gto.Mole()
+    if verbose :
+        test_mol.verbose = 4
+    else :
+        test_mol.verbose = 0
+    test_mol.atom = [["Ne", coord] for coord in mole_coords]
+    test_mol.basis = BASIS
+    test_mol.spin = 0
+    test_mol.build(0,0,unit="Ang")
+    proj = gto.intor_cross(f'int1e_{intor}_sph', mol, test_mol) 
+    
+    def proj_func(mo):
+        proj_coeff = np.matmul(mo, proj).reshape(*mo.shape[:2], natm, -1)
+        if verbose:
+            print('shape of coeff data          ', proj_coeff.shape)
+        # res : nframe x nocc/nvir x natm x nproj
+        return proj_coeff, proj_coeff.shape[-1]
+    
+    return proj_func
+
+
+def proj_frame(xyz_file, mo_dir, dump_dir=None, basis='ccpvtz', ename="e_hf", intor='ovlp', verbose=False):
+    mol = parse_xyz(xyz_file, basis=basis)
+    meta, ehf, e_occ, c_occ = load_data(mo_dir, ename)
+    
+    proj_func = gen_proj(mol, intor, verbose)
+    c_proj_occ,nproj = proj_func(c_occ)
+    c_occ = c_proj_occ
+    meta = np.append(meta, nproj)
+    # print(meta, c_proj_occ.shape)
+
+    if dump_dir is not None:
+        dump_data(dump_dir, meta, ehf, e_occ, c_occ)
+    return meta, ehf, e_occ, c_occ
+
+
+def load_data(dir_name, ename="e_hf"):
+    meta = np.loadtxt(os.path.join(dir_name, 'system.raw'), dtype=int).reshape(-1)
+    natm = meta[0]
+    nao = meta[1]
+    nocc = meta[2]
+    nvir = meta[3]
+    ehf = np.loadtxt(os.path.join(dir_name, f'{ename}.raw')).reshape(-1, 1)
+    e_occ = np.loadtxt(os.path.join(dir_name, 'ener_occ.raw')).reshape(-1, nocc)
+    c_occ = np.loadtxt(os.path.join(dir_name, 'coeff_occ.raw')).reshape([-1, nocc, nao])
+    return meta, ehf, e_occ, c_occ
+
+
+def dump_data(dir_name, meta, ehf, e_occ, c_occ, dm_dict={}) :
+    os.makedirs(dir_name, exist_ok = True)
+    np.savetxt(os.path.join(dir_name, 'system.raw'), 
+               meta.reshape(1,-1), 
+               fmt = '%d',
+               header = 'natm nao nocc nvir nproj')
+    nframe = e_occ.shape[0]
+    natm = meta[0]
+    nao = meta[1]
+    nocc = meta[2]
+    nvir = meta[3]
+    nproj = meta[4]
+    # ntest == natm
+    assert(all(c_occ.shape == np.array([nframe, nocc, natm, nproj], dtype=int)))
+    assert(all(e_occ.shape == np.array([nframe, nocc], dtype=int)))
+    assert(all(all(dm.shape == np.array([nframe, natm, nproj], dtype=int)) for dm in dm_dict.values()))
+    np.save(os.path.join(dir_name, 'e_hf.npy'), ehf) 
+    np.save(os.path.join(dir_name, 'ener_occ.npy'), e_occ)
+    np.save(os.path.join(dir_name, 'coeff_occ.npy'), c_occ)
+    for name, dm in dm_dict.items():
+        np.save(os.path.join(dir_name, f'{name}.npy'), dm)
+
+
+def main(xyz_files, mo_dirs, dump_dir, basis='ccpvtz', ename="e_hf", eig_names=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], intor='ovlp', verbose='False'):
+    assert len(xyz_files) == len(mo_dirs)
+    oldmeta = None
+    all_e_hf = []
+    all_e_occ = []
+    all_c_occ = []
+    all_dm_dict = {name:[] for name in eig_names}
+    
+    for xf, md in zip(xyz_files, mo_dirs):
+        meta, e_hf, e_occ, c_occ = proj_frame(xf, md, basis=basis, ename=ename, intor=intor, verbose=verbose)
+        if oldmeta is not None:
+            assert all(oldmeta == meta), "all frames has to be in the same system thus meta has to be equal!"
+        oldmeta = meta
+        all_e_hf.append(e_hf)
+        all_e_occ.append(e_occ)
+        all_c_occ.append(c_occ)
+        for name, dm_list in all_dm_dict.items():
+            dm_list.append(2 * calc_eig(name, c_occ, e_occ, xf, shell=SHELL)) # multiply by 2 for restricted method, doubly occupied orbitals
+        print(f"{xf} && {md} finished")
+
+    all_e_hf = np.concatenate(all_e_hf)
+    all_e_occ = np.concatenate(all_e_occ)
+    all_c_occ = np.concatenate(all_c_occ)
+    for name in all_dm_dict.keys():
+        all_dm_dict[name] = np.concatenate(all_dm_dict[name])
+
+    dump_data(dump_dir, meta, all_e_hf, all_e_occ, all_c_occ, all_dm_dict)
+    print("done")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="project mo_coeffs into atomic basis and calculate descriptors.")
+    parser.add_argument("-x", "--xyz-file", nargs="+", help="input xyz file(s), if more than one, concat them")
+    parser.add_argument("-f", "--mo-dir", nargs="+", help="input mo folder(s), must of same number with xyz files")
+    parser.add_argument("-d", "--dump-dir", default=".", help="dir of dumped files, if not specified, use current folder")
+    parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
+    parser.add_argument("-I", "--intor", default="ovlp", help="intor string used to calculate int1e")
+    parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
+    parser.add_argument("-e", "--ename", default="e_hf", help="file name for total energy")
+    parser.add_argument("-E", "--eig-name", nargs="*", default=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], 
+                        help="name of eigen values to be calculated and dumped")
+    args = parser.parse_args()
+    
+    main(args.xyz_file, args.mo_dir, args.dump_dir, args.basis,
+         args.ename, args.eig_name, args.intor, args.verbose)
\ No newline at end of file
--- a/scripts/legacy/rhf.py
+++ b/scripts/legacy/rhf.py
+# coding: utf-8
+
+import numpy as np
+from pyscf import gto, scf, lib
+from pyscf.mp.mp2 import _mo_energy_without_core
+from time import time
+import os
+import sys
+import argparse
+
+
+def parse_xyz(filename, basis='ccpvtz', verbose=False):
+    with open(filename) as fp:
+        natoms = int(fp.readline())
+        comments = fp.readline()
+        xyz_str = "".join(fp.readlines())
+    mol = gto.Mole()
+    mol.verbose = 4 if verbose else 0
+    mol.atom = xyz_str
+    mol.basis  = basis
+    mol.build(0,0,unit="Ang")
+    return mol  
+
+
+def fix_gauge(mo_coeff) :
+    nvec = mo_coeff.shape[1]
+    ndim = mo_coeff.shape[0]
+    ret = np.zeros(mo_coeff.shape)
+    count = 0
+    for ii in range(nvec) :
+        for jj in range(ndim) :
+            if np.sign(mo_coeff[jj,ii]) != 0 :
+                break
+        if jj == ndim :
+            # mo_coeff[:,ii] == 0
+            assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
+            raise RuntimeError( 'ERROR: zero eigen func, should not happen')
+            continue
+        else :
+            if (jj != 0) :
+                print('gauge ref is not 0')
+            factor = np.sign(mo_coeff[jj,ii])
+            ret[:,ii] = factor * mo_coeff[:,ii]
+            count += 1
+    #         break
+    # print(count)
+    return ret
+
+
+def mol_electron(mol, chkfile=None, verbose=False) :
+    if verbose:
+        start_t = time()
+    nao = mol.nao
+    natm = mol.natm
+    rhf = scf.RHF(mol)
+    if chkfile:
+        rhf.set(chkfile=chkfile)
+    erhf = rhf.kernel()
+    if verbose:
+        rhf_t = time()
+        print(f"time of rhf: {rhf_t - start_t}")
+
+    mo_energy = rhf.mo_energy
+    mo_occ = rhf.mo_occ
+    # mo_coeff = rhf.mo_coeff
+    mo_coeff_ = rhf.mo_coeff
+    mo_coeff= fix_gauge(mo_coeff_)
+    occ_a = (mo_occ>0)
+    # occ_b = (mo_occ[1]>0)
+    vir_a = (mo_occ==0)
+    # vir_b = (mo_occ[1]==0)
+    nocc_a = sum(occ_a)
+    # nocc_b = sum(occ_b)
+    nocc = nocc_a
+    nvir_a = sum(vir_a)
+    # nvir_b = sum(vir_b)
+    nvir = nvir_a
+    assert(nocc + nvir == nao)
+    if verbose :
+        print('nao = %d, nocc = %d, nvir = %d' % \
+              (nao, nocc, nvir))
+        print('shape of a and b coeffs:     ', mo_coeff[0].shape, mo_coeff[1].shape)
+    c_occ = mo_coeff[:,occ_a]
+    c_vir = mo_coeff[:,vir_a]
+    e_occ = mo_energy[occ_a]
+    e_vir = mo_energy[vir_a]
+    c_occ = c_occ.T
+    c_vir = c_vir.T
+    meta = [natm, nao, nocc, nvir]        
+    if verbose :
+        print('shape of coeff data          ', c_occ.shape)
+        print('shape of ener  data          ', e_occ.shape)
+        print('shape of coeff data          ', c_vir.shape)
+        print('shape of ener  data          ', e_vir.shape)
+        print('E(RKS)   = %.9g' % erhf)
+    return meta, erhf, (e_occ, e_vir), (c_occ, c_vir)
+    # return erhf, myemp2, ener_data, coeff_data
+
+    
+def dump_data(dir_name, meta, ehf, e_data, c_data) :
+    os.makedirs(dir_name, exist_ok = True)
+    np.savetxt(os.path.join(dir_name, 'system.raw'), 
+               np.array(meta).reshape(1,-1), 
+               fmt = '%d',
+               header = 'natm nao nocc nvir')
+    nframe = 1
+    natm = meta[0]
+    nao = meta[1]
+    nocc = meta[2]
+    nvir = meta[3]
+    # ntest == natm
+    assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
+    assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
+    assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
+    assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
+    np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1])) 
+    np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
+
+
+def gen_frame(xyz_file, basis='ccpvtz', dump_dir=None, verbose=False):
+    if dump_dir is None:
+        dump_dir = os.path.splitext(xyz_file)[0]
+    mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
+    mol_meta, ehf, e_data, c_data = mol_electron(mol, verbose=verbose)
+    dump_data(dump_dir, mol_meta, ehf, e_data, c_data)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
+    parser.add_argument("files", nargs="+", help="input xyz files")
+    parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
+    parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
+    parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
+    args = parser.parse_args()
+
+    for fn in args.files:
+        if args.dump_dir is None:
+            dump = None
+        else:
+            dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
+        try:
+            gen_frame(fn, args.basis, dump, args.verbose)
+            print(f"{fn} finished")
+        except Exception as e:
+            print(f"{fn} failed,", e, file=sys.stderr)
+            raise
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/legacy/rks.py
+++ b/scripts/legacy/rks.py
+# coding: utf-8
+
+import numpy as np
+from pyscf import gto, dft, lib
+from pyscf.mp.mp2 import _mo_energy_without_core
+from time import time
+import os
+import sys
+import argparse
+
+
+def parse_xyz(filename, basis='ccpvtz', verbose=False):
+    with open(filename) as fp:
+        natoms = int(fp.readline())
+        comments = fp.readline()
+        xyz_str = "".join(fp.readlines())
+    mol = gto.Mole()
+    mol.verbose = 4 if verbose else 0
+    mol.atom = xyz_str
+    mol.basis  = basis
+    mol.build(0,0,unit="Ang")
+    return mol  
+
+
+def fix_gauge(mo_coeff) :
+    nvec = mo_coeff.shape[1]
+    ndim = mo_coeff.shape[0]
+    ret = np.zeros(mo_coeff.shape)
+    count = 0
+    for ii in range(nvec) :
+        for jj in range(ndim) :
+            if np.sign(mo_coeff[jj,ii]) != 0 :
+                break
+        if jj == ndim :
+            # mo_coeff[:,ii] == 0
+            assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
+            raise RuntimeError( 'ERROR: zero eigen func, should not happen')
+            continue
+        else :
+            if (jj != 0) :
+                print('gauge ref is not 0')
+            factor = np.sign(mo_coeff[jj,ii])
+            ret[:,ii] = factor * mo_coeff[:,ii]
+            count += 1
+    #         break
+    # print(count)
+    return ret
+
+
+def mol_electron(mol, xc='pbe', chkfile=None, verbose=False) :
+    if verbose:
+        start_t = time()
+    nao = mol.nao
+    natm = mol.natm
+    rks = dft.RKS(mol)
+    rks.xc = xc
+    if chkfile:
+        rks.set(chkfile=chkfile)
+    erks = rks.kernel()
+    if verbose:
+        rks_t = time()
+        print(f"time of rks: {rks_t - start_t}")
+
+    mo_energy = rks.mo_energy
+    mo_occ = rks.mo_occ
+    # mo_coeff = rks.mo_coeff
+    mo_coeff_ = rks.mo_coeff
+    mo_coeff= fix_gauge(mo_coeff_)
+    occ_a = (mo_occ>0)
+    # occ_b = (mo_occ[1]>0)
+    vir_a = (mo_occ==0)
+    # vir_b = (mo_occ[1]==0)
+    nocc_a = sum(occ_a)
+    # nocc_b = sum(occ_b)
+    nocc = nocc_a
+    nvir_a = sum(vir_a)
+    # nvir_b = sum(vir_b)
+    nvir = nvir_a
+    assert(nocc + nvir == nao)
+    if verbose :
+        print('nao = %d, nocc = %d, nvir = %d' % \
+              (nao, nocc, nvir))
+        print('shape of a and b coeffs:     ', mo_coeff[0].shape, mo_coeff[1].shape)
+    c_occ = mo_coeff[:,occ_a]
+    c_vir = mo_coeff[:,vir_a]
+    e_occ = mo_energy[occ_a]
+    e_vir = mo_energy[vir_a]
+    c_occ = c_occ.T
+    c_vir = c_vir.T
+    meta = [natm, nao, nocc, nvir]        
+    if verbose :
+        print('shape of coeff data          ', c_occ.shape)
+        print('shape of ener  data          ', e_occ.shape)
+        print('shape of coeff data          ', c_vir.shape)
+        print('shape of ener  data          ', e_vir.shape)
+        print('E(RKS)   = %.9g' % erks)
+    return meta, erks, (e_occ, e_vir), (c_occ, c_vir)
+    # return erks, myemp2, ener_data, coeff_data
+
+    
+def dump_data(dir_name, meta, ehf, e_data, c_data) :
+    os.makedirs(dir_name, exist_ok = True)
+    np.savetxt(os.path.join(dir_name, 'system.raw'), 
+               np.array(meta).reshape(1,-1), 
+               fmt = '%d',
+               header = 'natm nao nocc nvir')
+    nframe = 1
+    natm = meta[0]
+    nao = meta[1]
+    nocc = meta[2]
+    nvir = meta[3]
+    # ntest == natm
+    assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
+    assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
+    assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
+    assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
+    np.savetxt(os.path.join(dir_name, 'e_dft.raw'), np.reshape(ehf, [nframe,1])) 
+    np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
+
+
+def gen_frame(xyz_file, basis='ccpvtz', xc='pbe', dump_dir=None, verbose=False):
+    if dump_dir is None:
+        dump_dir = os.path.splitext(xyz_file)[0]
+    mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
+    mol_meta, ehf, e_data, c_data = mol_electron(mol, xc=xc, verbose=verbose)
+    dump_data(dump_dir, mol_meta, ehf, e_data, c_data)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
+    parser.add_argument("files", nargs="+", help="input xyz files")
+    parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
+    parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
+    parser.add_argument("-X", "--xc", default='pbe', type=str, help="xc functional")
+    parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
+    args = parser.parse_args()
+
+    for fn in args.files:
+        if args.dump_dir is None:
+            dump = None
+        else:
+            dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
+        try:
+            gen_frame(fn, args.basis, args.xc, dump, args.verbose)
+            print(f"{fn} finished")
+        except Exception as e:
+            print(f"{fn} failed,", e, file=sys.stderr)
+            raise
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/legacy/rmp2.py
+++ b/scripts/legacy/rmp2.py
+# coding: utf-8
+
+import numpy as np
+from pyscf import gto, scf, mp, lib
+from pyscf.mp.mp2 import _mo_energy_without_core
+from time import time
+import os
+import sys
+import argparse
+
+
+def my_kernel(mp, mo_energy=None, mo_coeff=None, eris=None, with_eij=True):
+    if mo_energy is None or mo_coeff is None:
+        if mp.mo_energy is None or mp.mo_coeff is None:
+            raise RuntimeError('mo_coeff, mo_energy are not initialized.\n'
+                               'You may need to call mf.kernel() to generate them.')
+        mo_coeff = None
+        mo_energy = _mo_energy_without_core(mp, mp.mo_energy)
+    else:
+        # For backward compatibility.  In pyscf-1.4 or earlier, mp.frozen is
+        # not supported when mo_energy or mo_coeff is given.
+        assert(mp.frozen is 0 or mp.frozen is None)
+
+    if eris is None: eris = mp.ao2mo(mo_coeff)
+
+    nocc = mp.nocc
+    nvir = mp.nmo - nocc
+    eia = mo_energy[:nocc,None] - mo_energy[None,nocc:]
+
+    if with_eij:
+        eij = np.empty((nocc,nocc), dtype=eia.dtype)
+    else:
+        eij = None
+
+    emp2 = 0
+    for i in range(nocc):
+        gi = np.asarray(eris.ovov[i*nvir:(i+1)*nvir])
+        gi = gi.reshape(nvir,nocc,nvir).transpose(1,0,2)
+        t2i = gi.conj()/lib.direct_sum('jb+a->jba', eia, eia[i])
+        tmp_eij = 2 * np.einsum('jab,jab->j', t2i, gi) - np.einsum('jab,jba->j', t2i, gi)
+        emp2 += tmp_eij.sum()
+        if with_eij:
+            eij[i] = tmp_eij
+
+    return emp2.real, eij.real
+
+
+def parse_xyz(filename, basis='ccpvtz', verbose=False):
+    with open(filename) as fp:
+        natoms = int(fp.readline())
+        comments = fp.readline()
+        xyz_str = "".join(fp.readlines())
+    mol = gto.Mole()
+    mol.verbose = 4 if verbose else 0
+    mol.atom = xyz_str
+    mol.basis  = basis
+    mol.build(0,0,unit="Ang")
+    return mol  
+
+
+def fix_gauge(mo_coeff) :
+    nvec = mo_coeff.shape[1]
+    ndim = mo_coeff.shape[0]
+    ret = np.zeros(mo_coeff.shape)
+    count = 0
+    for ii in range(nvec) :
+        for jj in range(ndim) :
+            if np.sign(mo_coeff[jj,ii]) != 0 :
+                break
+        if jj == ndim :
+            # mo_coeff[:,ii] == 0
+            assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
+            raise RuntimeError( 'ERROR: zero eigen func, should not happen')
+            continue
+        else :
+            if (jj != 0) :
+                print('gauge ref is not 0')
+            factor = np.sign(mo_coeff[jj,ii])
+            ret[:,ii] = factor * mo_coeff[:,ii]
+            count += 1
+    #         break
+    # print(count)
+    return ret
+
+
+def mol_electron(mol, frozen=0, chkfile=None, verbose=False) :
+    if verbose:
+        start_t = time()
+    nao = mol.nao
+    natm = mol.natm
+    rhf = scf.RHF(mol)
+    if chkfile:
+        rhf.set(chkfile=chkfile)
+    erhf = rhf.kernel()
+    if verbose:
+        rhf_t = time()
+        print(f"time of rhf: {rhf_t - start_t}")
+
+    mo_energy = rhf.mo_energy
+    mo_occ = rhf.mo_occ
+    # mo_coeff = rhf.mo_coeff
+    mo_coeff_ = rhf.mo_coeff
+    mo_coeff= fix_gauge(mo_coeff_)
+    occ_a = (mo_occ>0)
+    occ_a[:frozen] = False
+    # occ_b = (mo_occ[1]>0)
+    vir_a = (mo_occ==0)
+    # vir_b = (mo_occ[1]==0)
+    nocc_a = sum(occ_a)
+    # nocc_b = sum(occ_b)
+    nocc = nocc_a
+    nvir_a = sum(vir_a)
+    # nvir_b = sum(vir_b)
+    nvir = nvir_a
+    assert(nocc + nvir + frozen == nao)
+    if verbose :
+        print('nao = %d, nocc = %d, nvir = %d' % \
+              (nao, nocc, nvir))
+        print('shape of a and b coeffs:     ', mo_coeff[0].shape, mo_coeff[1].shape)
+    c_occ = mo_coeff[:,occ_a]
+    c_vir = mo_coeff[:,vir_a]
+    e_occ = mo_energy[occ_a]
+    e_vir = mo_energy[vir_a]
+    c_occ = c_occ.T
+    c_vir = c_vir.T
+    meta = [natm, nao, nocc, nvir]        
+    if verbose :
+        print('shape of coeff data          ', c_occ.shape)
+        print('shape of ener  data          ', e_occ.shape)
+        print('shape of coeff data          ', c_vir.shape)
+        print('shape of ener  data          ', e_vir.shape)
+        mid_t = time()
+        # print(f"time of collecting results: {mid_t - rhf_t}")
+
+    mp2 = mp.MP2(rhf, frozen=frozen)
+    # emp2 = mp2.kernel()
+    emp2, emp2_ij = my_kernel(mp2)
+    if verbose :
+        print('E(HF)   = %.9g' % erhf)
+        print('E(RMP2) = %.9g' % emp2)
+        print(f"time of mp2: {time()-mid_t}")
+    return meta, erhf, emp2, emp2_ij, (e_occ, e_vir), (c_occ, c_vir)
+    # return erhf, myemp2, ener_data, coeff_data
+
+    
+def dump_data(dir_name, meta, ehf, emp2, ec_ij, e_data, c_data) :
+    os.makedirs(dir_name, exist_ok = True)
+    np.savetxt(os.path.join(dir_name, 'system.raw'), 
+               np.array(meta).reshape(1,-1), 
+               fmt = '%d',
+               header = 'natm nao nocc nvir')
+    nframe = 1
+    natm = meta[0]
+    nao = meta[1]
+    nocc = meta[2]
+    nvir = meta[3]
+    # ntest == natm
+    assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
+    assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
+    assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
+    assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
+    assert(all(ec_ij.shape == np.array([nocc, nocc], dtype = int)))
+    np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1])) 
+    np.savetxt(os.path.join(dir_name, 'e_mp2.raw'), np.reshape(emp2, [nframe,1])) 
+    np.savetxt(os.path.join(dir_name, 'ec_ij.raw'), ec_ij.reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
+    np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
+
+
+def gen_frame(xyz_file, basis='ccpvtz', frozen=0, dump_dir=None, verbose=False):
+    if dump_dir is None:
+        dump_dir = os.path.splitext(xyz_file)[0]
+    mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
+    mol_meta, ehf, emp2, ec_ij, e_data, c_data = mol_electron(mol, frozen=frozen, verbose=verbose)
+    dump_data(dump_dir, mol_meta, ehf, emp2, ec_ij, e_data, c_data)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
+    parser.add_argument("files", nargs="+", help="input xyz files")
+    parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
+    parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
+    parser.add_argument("-F", "--frozen", default=0, type=int, help="number of orbit to be frozen when calculate mp2")
+    parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
+    args = parser.parse_args()
+
+    for fn in args.files:
+        if args.dump_dir is None:
+            dump = None
+        else:
+            dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
+        try:
+            gen_frame(fn, args.basis, args.frozen, dump, args.verbose)
+            print(f"{fn} finished")
+        except Exception as e:
+            print(f"{fn} failed,", e, file=sys.stderr)
+            raise
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/solve_mol.py
+++ b/scripts/solve_mol.py
+#!/usr/bin/env python
+#SBATCH -N 1
+#SBATCH -c 10
+#SBATCH -t 24:00:00
+#SBATCH --mem=32G
+
+import time
+import numpy as np
+from pyscf import gto, scf
+
+BOHR = 0.52917721092
+
+_NO_FORCE = False
+_NO_DM = False
+_MUST_UNRES = False
+_USE_NEWTON = False
+
+def parse_xyz(filename, basis='ccpvdz', **kwargs):
+    with open(filename) as fp:
+        natoms = int(fp.readline())
+        comments = fp.readline()
+        xyz_str = "".join(fp.readlines())
+    mol = gto.Mole()
+    mol.atom = xyz_str
+    mol.basis = basis
+    mol.set(**kwargs)
+    if "spin" not in kwargs:
+        mol.spin = mol.nelectron % 2
+    mol.build(0,0,unit="Ang")
+    return mol  
+
+
+def get_method(name: str):
+    lname = name.lower()
+    if lname == "hf":
+        return calc_hf
+    if lname[:3] == "dft":
+        xc = lname.split("@")[1] if "@" in lname else "pbe"
+        return lambda mol, **scfargs: calc_dft(mol, xc, **scfargs)
+    if lname == "mp2":
+        return calc_mp2
+    if lname == "ccsd":
+        return calc_ccsd
+    if lname.startswith(("ccsd_t", "ccsd-t", "ccsd(t)")):
+        return calc_ccsd_t
+    if lname == "fci":
+        return calc_fci
+    raise ValueError(f"Unknown calculation method: {name}")
+
+def solve_scf(mol, **scfargs):
+    HFmethod = scf.HF if not _MUST_UNRES else scf.UHF
+    mf = HFmethod(mol).set(init_guess_breaksym=True)
+    init_dm = mf.get_init_guess()
+    # if _MUST_UNRES:
+    #     init_dm[1][:2,:2] = 0
+    mf.kernel(init_dm)
+    if _USE_NEWTON:
+        mf = scf.fast_newton(mf)
+    return mf
+
+def calc_hf(mol, **scfargs):
+    mf = solve_scf(mol, **scfargs)
+    if not mf.converged:
+        raise RuntimeError("SCF not converged!")
+    etot = mf.e_tot
+    grad = mf.nuc_grad_method().kernel() if not _NO_FORCE else None
+    rdm = mf.make_rdm1() if not _NO_DM else None
+    return etot, grad, rdm
+
+def calc_dft(mol, xc="pbe", **scfargs):
+    from pyscf import dft
+    KSmethod = dft.KS if not _MUST_UNRES else dft.UKS
+    mf = KSmethod(mol, xc).run(**scfargs)
+    if not mf.converged:
+        raise RuntimeError("SCF not converged!")
+    etot = mf.e_tot
+    if _NO_FORCE or dft.libxc.xc_type(xc) in ('MGGA', 'NLC'):
+        grad = None
+    else:
+        grad = mf.nuc_grad_method().kernel()
+    rdm = mf.make_rdm1() if not _NO_DM else None
+    return etot, grad, rdm
+
+def calc_mp2(mol, **scfargs):
+    import pyscf.mp
+    mf = solve_scf(mol, **scfargs)
+    if not mf.converged:
+        raise RuntimeError("SCF not converged!")
+    postmf = pyscf.mp.MP2(mf).run()
+    etot = postmf.e_tot
+    grad = postmf.nuc_grad_method().kernel() if not _NO_FORCE else None
+    return etot, grad, None
+
+def calc_ccsd(mol, **scfargs):
+    import pyscf.cc
+    mf = solve_scf(mol, **scfargs)
+    if not mf.converged:
+        raise RuntimeError("SCF not converged!")
+    mycc = mf.CCSD().run()
+    etot = mycc.e_tot
+    grad = mycc.nuc_grad_method().kernel() if not _NO_FORCE else None
+    ccdm = np.einsum('...pi,...ij,...qj->...pq', 
+        mf.mo_coeff, mycc.make_rdm1(), mf.mo_coeff.conj()) if not _NO_DM else None
+    return etot, grad, ccdm
+
+def calc_ccsd_t(mol, **scfargs):
+    import pyscf.cc
+    mf = solve_scf(mol, **scfargs)
+    if not mf.converged:
+        raise RuntimeError("SCF not converged!")
+    mycc = mf.CCSD().run()
+    et_correction = mycc.ccsd_t()
+    etot = mycc.e_tot + et_correction
+    if _NO_FORCE:
+        return etot, None, None
+    import pyscf.grad.ccsd_t as ccsd_t_grad
+    grad = ccsd_t_grad.Gradients(mycc).kernel()
+    return etot, grad, None
+
+def calc_fci(mol, **scfargs):
+    import pyscf.fci
+    mf = solve_scf(mol, **scfargs)
+    if not mf.converged:
+        raise RuntimeError("SCF not converged!")
+    myci = pyscf.fci.FCI(mf)
+    etot, fcivec = myci.kernel()
+    rdm = np.einsum('...pi,...ij,...qj->...pq', 
+            mf.mo_coeff, 
+            myci.make_rdm1s(fcivec, mol.nao, mol.nelec), 
+            mf.mo_coeff.conj()).sum(0) if not _NO_DM else None
+    return etot, None, rdm
+
+
+if __name__ == "__main__":
+    import argparse
+    import os
+    parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
+    parser.add_argument("files", nargs="+", help="input xyz files")
+    parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file")
+    parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information")
+    parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation")
+    parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule")
+    parser.add_argument("-S", "--spin", default=0, type=int, help="net spin of the molecule")
+    parser.add_argument("-M", "--method", default="ccsd", help="method used to do the calculation. support MP2, CCSD and CCSD(T)")
+    parser.add_argument("-U", "--unrestrict", action="store_true", help="force using unrestricted methods")
+    parser.add_argument("-NF", "--no-force", action="store_true", help="do not calculate force")
+    parser.add_argument("-ND", "--no-dm", action="store_true", help="do not calculate dm")
+    parser.add_argument("-SO", "--newton", action="store_true", help="allow using newton method when scf not converged")
+    parser.add_argument("--scf-input", help="yaml file to specify scf arguments")
+    args = parser.parse_args()
+    
+    if args.unrestrict: _MUST_UNRES = True
+    if args.no_force: _NO_FORCE = True
+    if args.no_dm: _NO_DM = True
+    if args.newton: _USE_NEWTON = True
+
+    scfargs = {}
+    if args.scf_input is not None:
+        import ruamel.yaml as yaml
+        with open(args.scf_input, 'r') as fp:
+            scfargs = yaml.safe_load(fp)        
+    if args.dump_dir is not None:
+        os.makedirs(args.dump_dir, exist_ok = True)
+    calculator = get_method(args.method)
+
+    for fn in args.files:
+        tic = time.time()
+        mol = parse_xyz(fn, args.basis, verbose=args.verbose, charge=args.charge, spin=args.spin)
+        try:
+            res = calculator(mol, **scfargs)
+        except RuntimeError as err:
+            print(fn, f"failed, {err}")
+            continue
+        etot, grad, rdm = res
+        if args.dump_dir is None:
+            dump_dir = os.path.dirname(fn)
+        else:
+            dump_dir = args.dump_dir
+        dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0])
+        np.save(dump+".energy.npy", [etot])
+        if grad is not None:
+            force = -grad / BOHR
+            np.save(dump+".force.npy", force)
+        if rdm is not None:
+            np.save(dump+".dm.npy", rdm)
+        if args.verbose:
+            print(fn, f"done, time = {time.time()-tic}")
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
+import pathlib
+import setuptools
+
+
+here = pathlib.Path(__file__).parent.resolve()
+readme = (here / 'README.md').read_text(encoding='utf-8')
+
+# did not include torch and pyscf here
+install_requires=['numpy', 'paramiko', 'ruamel.yaml']
+
+
+setuptools.setup(
+    name="deepks",
+    use_scm_version={'write_to': 'deepks/_version.py'},
+    setup_requires=['setuptools_scm'],
+    author="Yixiao Chen",
+    author_email="yixiaoc@princeton.edu",
+    description="DeePKS-kit: generate accurate (self-consistent) energy functionals",
+    long_description=readme,
+    long_description_content_type="text/markdown",
+    packages=setuptools.find_packages(include=['deepks', 'deepks.*']),
+    classifiers=[
+        "Programming Language :: Python :: 3.7",
+    ],
+    keywords='deepks DeePKS-kit',
+    install_requires=install_requires,
+    python_requires=">=3.7",
+    entry_points={
+        'console_scripts': [
+            'deepks=deepks.main:main_cli',
+            'dks=deepks.main:main_cli',
+        ],
+    },
+)
\ No newline at end of file