Commit 47af8be9 authored by yuhai's avatar yuhai
Browse files

Initial commit

parents
# all arguments are flatten into this file
# they can also be splitted into separate files and referenced here
n_iter: 0 # use 0 as a placeholder
# training and testing systems
systems_train: # can also be files that containing system paths
- ../systems/group.0[0-2] # support glob
systems_test: # if empty, use the last system of training set
- ../systems/group.03
# directory setting
workdir: "."
share_folder: "share" # folder that stores all other settings
# scf settings
scf_input: # can also be specified by a separete file
basis: ccpvdz
# this is for force training
dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
verbose: 1
mol_args:
incore_anyway: True
scf_args:
conv_tol: 1e-6
conv_tol_grad: 1e-2
level_shift: 0.1
diis_space: 20
conv_check: false # pyscf conv_check has a bug
scf_machine:
# every system will be run as a separate command (a task)
sub_size: 1
# 4 tasks will be gathered into one group and submitted together as a shell script
group_size: 4
dispatcher:
context: local
batch: shell # set to shell to run on local machine, you can also use `slurm`
remote_profile: null # not needed in local case
# resources are no longer needed, other than the envs can still be set here
resources:
envs:
PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G
python: "python" # use python in path
# train settings
train_input:
# model_args is ignored, since this is used as restart
data_args:
batch_size: 16
group_batch: 1
extra_label: true
conv_filter: true
conv_name: conv
preprocess_args:
preshift: false # restarting model already shifted. Will not recompute shift value
prescale: false # same as above
prefit_ridge: 1e1
prefit_trainable: false
train_args:
decay_rate: 0.5
decay_steps: 1000
display_epoch: 100
force_factor: 1
n_epoch: 5000
start_lr: 0.0001
train_machine:
dispatcher:
context: local
batch: shell # same as above, use shell to run on local machine
remote_profile: null # use lazy local
python: "python" # use python in path
# resources are no longer needed, and the task will use gpu automatically if there is one
# init settings
init_model: false # do not use existing model in share_folder/init/model.pth
init_scf:
basis: ccpvdz
# this is for pure energy training
dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta]
verbose: 1
mol_args:
incore_anyway: True
scf_args:
conv_tol: 1e-8
conv_check: false # pyscf conv_check has a bug
init_train:
model_args: # necessary as this is init training
hidden_sizes: [100, 100, 100]
output_scale: 100
use_resnet: true
actv_fn: mygelu
data_args:
batch_size: 16
group_batch: 1
preprocess_args:
preshift: true
prescale: false
prefit_ridge: 1e1
prefit_trainable: false
train_args:
decay_rate: 0.96
decay_steps: 500
display_epoch: 100
n_epoch: 15000
start_lr: 0.0003
# other settings
cleanup: false
strict: true
# overwriting the base config
n_iter: 5
# adding penalty
scf_input: # can also be specified by a separete file
basis: ccpvdz
# this is for force training
dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
verbose: 1
mol_args:
incore_anyway: True
scf_args:
conv_tol: 1e-6
conv_tol_grad: 1e-2
level_shift: 0.1
diis_space: 20
conv_check: false # pyscf conv_check has a bug
penalty_terms:
# Coulomb loss as penalty, random strength
- type: coulomb
required_labels: dm # where the label is stored (sysfolder/dm.npy)
strength: 1 # can be larger, like 5
random: true # actual strength vary between [0, strength]
\ No newline at end of file
python -u -m deepks iterate base.yaml penalty.yaml >> log.iter 2> err.iter &&\
python -u -m deepks iterate base.yaml relax.yaml >> log.iter 2> err.iter
\ No newline at end of file
# overwriting the base config to run longer
n_iter: 10
\ No newline at end of file
nohup bash pipe.sh >/dev/null 2>&1 &
echo $! > PID
import os
import numpy as np
from glob import glob
BOHR = 0.52917721092
ELEMENTS = ['X', # Ghost
'H' , 'He', 'Li', 'Be', 'B' , 'C' , 'N' , 'O' , 'F' , 'Ne',
'Na', 'Mg', 'Al', 'Si', 'P' , 'S' , 'Cl', 'Ar', 'K' , 'Ca',
'Sc', 'Ti', 'V' , 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y' , 'Zr',
'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn',
'Sb', 'Te', 'I' , 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',
'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb',
'Lu', 'Hf', 'Ta', 'W' , 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg',
'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th',
'Pa', 'U' , 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm',
'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds',
'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og',
]
CHARGES = dict(((x,i) for i,x in enumerate(ELEMENTS)))
def parse_xyz(filename):
with open(filename) as fp:
natom = int(fp.readline())
comments = fp.readline().strip()
atom_str = fp.readlines()
atom_list = [a.split() for a in atom_str if a.strip()]
elements = [a[0] for a in atom_list]
coords = np.array([a[1:] for a in atom_list], dtype=float)
return natom, comments, elements, coords
def parse_unit(rawunit):
if isinstance(rawunit, str):
try:
unit = float(rawunit)
except ValueError:
if rawunit.upper().startswith(('B', 'AU')):
unit = BOHR
else: #unit[:3].upper() == 'ANG':
unit = 1.
else:
unit = rawunit
return unit
def load_array(file):
ext = os.path.splitext(file)[-1]
if "npy" in ext:
return np.load(file)
elif "npz" in ext:
raise NotImplementedError
else:
try:
arr = np.loadtxt(file)
except ValueError:
arr = np.loadtxt(file, dtype=str)
return arr
def load_glob(pattern):
[fn] = glob(pattern)
return load_array(fn)
def load_system(xyz_file):
base, ext = os.path.splitext(xyz_file)
assert ext == '.xyz'
natom, _, ele, coord = parse_xyz(xyz_file)
try:
energy = load_glob(f"{base}.energy*").reshape(1)
except:
energy = None
try:
force = load_glob(f"{base}.force*").reshape(natom, 3)
except:
force = None
try:
dm = load_glob(f"{base}.dm*")
nao = np.sqrt(dm.size).astype(int)
dm = dm.reshape(nao, nao)
except:
dm = None
return ele, coord, energy, force, dm
def dump_systems(xyz_files, dump_dir, unit="Bohr", ext_type=False):
print(f"saving to {dump_dir} ... ", end="", flush=True)
os.makedirs(dump_dir, exist_ok=True)
if not xyz_files:
print("empty list! did nothing")
return
unit = parse_unit(unit)
a_ele, a_coord, a_energy, a_force, a_dm = map(np.array,
zip(*[load_system(fl) for fl in xyz_files]))
a_coord /= unit
if ext_type:
ele = a_ele[0]
assert all(e == ele for e in a_ele), "element type for each xyz file has to be the same"
np.savetxt(os.path.join(dump_dir, "type.raw"), ele, fmt="%s")
np.save(os.path.join(dump_dir, "coord.npy"), a_coord)
else:
a_chg = [[[CHARGES[e]] for e in ele] for ele in a_ele]
a_atom = np.concatenate([a_chg, a_coord], axis=-1)
np.save(os.path.join(dump_dir, "atom.npy"), a_atom)
if not all(ene is None for ene in a_energy):
assert not any(ele is None for ele in a_energy)
np.save(os.path.join(dump_dir, "energy.npy"), a_energy)
if not all(ff is None for ff in a_force):
assert not any(ff is None for ff in a_force)
a_force *= unit
np.save(os.path.join(dump_dir, "force.npy"), a_force)
if not all(dm is None for dm in a_dm):
assert not any(dm is None for dm in a_dm)
np.save(os.path.join(dump_dir, "dm.npy"), a_dm)
print(f"finished", flush=True)
return
def main(xyz_files, dump_dir=".", group_size=-1, group_prefix="sys", unit="Bohr", ext_type=False):
if isinstance(xyz_files, str):
xyz_files = [xyz_files]
if group_size <= 0:
dump_systems(xyz_files, dump_dir, unit=unit, ext_type=ext_type)
return
ns = len(xyz_files)
ngroup = np.ceil(ns / group_size).astype(int)
nd = max(len(str(ngroup)), 2)
for i in range(ngroup):
dump_systems(xyz_files[i*group_size:(i+1)*group_size],
os.path.join(dump_dir, f"{group_prefix}.{i:0>{nd}d}"),
unit=unit, ext_type=ext_type)
return
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="convert .xyz files and corresponding properties "
"into systems with .npy files grouped in folders.",
argument_default=argparse.SUPPRESS)
parser.add_argument("xyz_files", metavar='FILE', nargs="+",
help="input xyz files")
parser.add_argument("-d", "--dump-dir",
help="directory of dumped system, default is current dir")
parser.add_argument("-U", "--unit",
help="length unit used to save npy files (assume xyz in Angstrom)")
parser.add_argument("-G", "--group-size", type=int,
help="if positive, split data into sub systems with given size, default: -1")
parser.add_argument("-P", "--group-prefix",
help=r"save sub systems with given prefix as `$dump_dir/$prefix.ii`, default: sys")
parser.add_argument("-T", "--ext-type", action="store_true",
help="if set, save the element type into separete `type.raw` file")
args = parser.parse_args()
main(**vars(args))
import numpy as np
from scipy.spatial.distance import squareform, pdist
def load_coords(filename):
return np.loadtxt(filename, skiprows=2, usecols=[1,2,3])
def cosine_switching(x, lower=1.9, upper=2.0, threshold=1e-5):
zx = x < threshold
lx = x < lower
ux = x > upper
mx = (~lx) & (~ux)
res = np.zeros_like(x)
res[~zx & lx] = 1
res[mx] = 0.5*np.cos(np.pi * (x[mx]-lower) / (upper-lower)) + 0.5
return res
def calc_weight(coords, lower=1.9, upper=2.0):
natom = coords.shape[0]
pair_dist = squareform(pdist(coords))
weight = cosine_switching(pair_dist, lower, upper).reshape(1, natom, natom)
return weight
def split(ci, shell):
sec = [1]*shell[0] + [3]*shell[1] + [5]*shell[2]
assert np.sum(sec) == ci.shape[-1]
ci_list = np.split(ci, np.cumsum(sec)[:-1], axis=-1)
return ci_list
def calc_atom_eig(ci, shell=(12,12,12), frozen=0):
ci_list = split(ci[:, frozen:], shell)
dm_list = [np.einsum('niap,niaq->napq', _ci, _ci) for _ci in ci_list]
eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list]
eig = np.concatenate(eig_list, -1)
return eig
def calc_atom_ener_eig(ci, ei, kernel=None, shell=(12,12,12), frozen=0):
if kernel is not None:
ei = kernel(ei)
ci_list = split(ci[:, frozen:], shell)
dm_list = [np.einsum('niap,niaq,ni->napq', _ci, _ci, ei[:, frozen:]) for _ci in ci_list]
eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list]
eig = np.concatenate(eig_list, -1)
return eig
def calc_neighbor_eig(ci, weight=None, shell=(12,12,12), frozen=0):
ci_list = split(ci[:, frozen:], shell)
dm_list = [np.einsum('niap,nibq->nabpq', _ci, _ci) for _ci in ci_list]
if weight is not None:
dm_list = [np.einsum('nabpq,nab->nabpq', _dm, weight) for _dm in dm_list]
eig_list = [np.linalg.eigvalsh(0.5*(_dm.sum(1) + _dm.sum(2))) for _dm in dm_list]
eig = np.concatenate(eig_list, -1)
return eig
def calc_eig(name, ci, ei=None, xyz_file=None, shell=(12,12,12)):
if name == 'dm_eig':
return calc_atom_eig(ci, shell=shell)
if name == 'od_eig':
assert xyz_file is not None
return calc_neighbor_eig(ci, calc_weight(load_coords(xyz_file)), shell=shell)
if name == 'se_eig':
assert ei is not None
return calc_atom_ener_eig(ci, ei, kernel=None, shell=shell)
if name == 'fe_eig':
assert ei is not None
return calc_atom_ener_eig(ci, ei, kernel=np.exp, shell=shell)
raise ValueError(f'unsupport name: {name}')
\ No newline at end of file
import numpy as np
from pyscf import gto
import os
import sys
import argparse
import mendeleev
from calc_eig import calc_eig
# aa = 2.0**np.arange(6,-3,-1)
aa = 1.5**np.array([17,13,10,7,5,3,2,1,0,-1,-2,-3])
bb = np.diag(np.ones(aa.size)) - np.diag(np.ones(aa.size-1), k=1)
SHELL = [aa.size] * 3
coef = np.concatenate([aa.reshape(-1,1), bb], axis=1)
BASIS = [[0, *coef.tolist()], [1, *coef.tolist()], [2, *coef.tolist()]]
def parse_xyz(filename, basis='ccpvtz', verbose=False):
with open(filename) as fp:
natoms = int(fp.readline())
comments = fp.readline()
xyz_str = "".join(fp.readlines())
mol = gto.Mole()
mol.verbose = 4 if verbose else 0
mol.atom = xyz_str
mol.basis = basis
try:
mol.build(0,0,unit="Ang")
except RuntimeError as e:
mol.spin = 1
mol.build(0,0,unit="Ang")
return mol
def gen_proj(mol, intor = 'ovlp', verbose = False) :
natm = mol.natm
mole_coords = mol.atom_coords(unit="Ang")
test_mol = gto.Mole()
if verbose :
test_mol.verbose = 4
else :
test_mol.verbose = 0
test_mol.atom = [["Ne", coord] for coord in mole_coords]
test_mol.basis = BASIS
test_mol.spin = 0
test_mol.build(0,0,unit="Ang")
proj = gto.intor_cross(f'int1e_{intor}_sph', mol, test_mol)
def proj_func(mo):
proj_coeff = np.matmul(mo, proj).reshape(*mo.shape[:2], natm, -1)
if verbose:
print('shape of coeff data ', proj_coeff.shape)
# res : nframe x nocc/nvir x natm x nproj
return proj_coeff, proj_coeff.shape[-1]
return proj_func
def proj_frame(xyz_file, mo_dir, dump_dir=None, basis='ccpvtz', ename="e_hf", intor='ovlp', verbose=False):
mol = parse_xyz(xyz_file, basis=basis)
meta, ehf, e_occ, c_occ = load_data(mo_dir, ename)
proj_func = gen_proj(mol, intor, verbose)
c_proj_occ,nproj = proj_func(c_occ)
c_occ = c_proj_occ
meta = np.append(meta, nproj)
# print(meta, c_proj_occ.shape)
if dump_dir is not None:
dump_data(dump_dir, meta, ehf, e_occ, c_occ)
return meta, ehf, e_occ, c_occ
def load_data(dir_name, ename="e_hf"):
meta = np.loadtxt(os.path.join(dir_name, 'system.raw'), dtype=int).reshape(-1)
natm = meta[0]
nao = meta[1]
nocc = meta[2]
nvir = meta[3]
ehf = np.loadtxt(os.path.join(dir_name, f'{ename}.raw')).reshape(-1, 1)
e_occ = np.loadtxt(os.path.join(dir_name, 'ener_occ.raw')).reshape(-1, nocc)
c_occ = np.loadtxt(os.path.join(dir_name, 'coeff_occ.raw')).reshape([-1, nocc, nao])
return meta, ehf, e_occ, c_occ
def dump_data(dir_name, meta, ehf, e_occ, c_occ, dm_dict={}) :
os.makedirs(dir_name, exist_ok = True)
np.savetxt(os.path.join(dir_name, 'system.raw'),
meta.reshape(1,-1),
fmt = '%d',
header = 'natm nao nocc nvir nproj')
nframe = e_occ.shape[0]
natm = meta[0]
nao = meta[1]
nocc = meta[2]
nvir = meta[3]
nproj = meta[4]
# ntest == natm
assert(all(c_occ.shape == np.array([nframe, nocc, natm, nproj], dtype=int)))
assert(all(e_occ.shape == np.array([nframe, nocc], dtype=int)))
assert(all(all(dm.shape == np.array([nframe, natm, nproj], dtype=int)) for dm in dm_dict.values()))
np.save(os.path.join(dir_name, 'e_hf.npy'), ehf)
np.save(os.path.join(dir_name, 'ener_occ.npy'), e_occ)
np.save(os.path.join(dir_name, 'coeff_occ.npy'), c_occ)
for name, dm in dm_dict.items():
np.save(os.path.join(dir_name, f'{name}.npy'), dm)
def main(xyz_files, mo_dirs, dump_dir, basis='ccpvtz', ename="e_hf", eig_names=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], intor='ovlp', verbose='False'):
assert len(xyz_files) == len(mo_dirs)
oldmeta = None
all_e_hf = []
all_e_occ = []
all_c_occ = []
all_dm_dict = {name:[] for name in eig_names}
for xf, md in zip(xyz_files, mo_dirs):
meta, e_hf, e_occ, c_occ = proj_frame(xf, md, basis=basis, ename=ename, intor=intor, verbose=verbose)
if oldmeta is not None:
assert all(oldmeta == meta), "all frames has to be in the same system thus meta has to be equal!"
oldmeta = meta
all_e_hf.append(e_hf)
all_e_occ.append(e_occ)
all_c_occ.append(c_occ)
for name, dm_list in all_dm_dict.items():
dm_list.append(2 * calc_eig(name, c_occ, e_occ, xf, shell=SHELL)) # multiply by 2 for restricted method, doubly occupied orbitals
print(f"{xf} && {md} finished")
all_e_hf = np.concatenate(all_e_hf)
all_e_occ = np.concatenate(all_e_occ)
all_c_occ = np.concatenate(all_c_occ)
for name in all_dm_dict.keys():
all_dm_dict[name] = np.concatenate(all_dm_dict[name])
dump_data(dump_dir, meta, all_e_hf, all_e_occ, all_c_occ, all_dm_dict)
print("done")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="project mo_coeffs into atomic basis and calculate descriptors.")
parser.add_argument("-x", "--xyz-file", nargs="+", help="input xyz file(s), if more than one, concat them")
parser.add_argument("-f", "--mo-dir", nargs="+", help="input mo folder(s), must of same number with xyz files")
parser.add_argument("-d", "--dump-dir", default=".", help="dir of dumped files, if not specified, use current folder")
parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
parser.add_argument("-I", "--intor", default="ovlp", help="intor string used to calculate int1e")
parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
parser.add_argument("-e", "--ename", default="e_hf", help="file name for total energy")
parser.add_argument("-E", "--eig-name", nargs="*", default=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'],
help="name of eigen values to be calculated and dumped")
args = parser.parse_args()
main(args.xyz_file, args.mo_dir, args.dump_dir, args.basis,
args.ename, args.eig_name, args.intor, args.verbose)
\ No newline at end of file
# coding: utf-8
import numpy as np
from pyscf import gto, scf, lib
from pyscf.mp.mp2 import _mo_energy_without_core
from time import time
import os
import sys
import argparse
def parse_xyz(filename, basis='ccpvtz', verbose=False):
with open(filename) as fp:
natoms = int(fp.readline())
comments = fp.readline()
xyz_str = "".join(fp.readlines())
mol = gto.Mole()
mol.verbose = 4 if verbose else 0
mol.atom = xyz_str
mol.basis = basis
mol.build(0,0,unit="Ang")
return mol
def fix_gauge(mo_coeff) :
nvec = mo_coeff.shape[1]
ndim = mo_coeff.shape[0]
ret = np.zeros(mo_coeff.shape)
count = 0
for ii in range(nvec) :
for jj in range(ndim) :
if np.sign(mo_coeff[jj,ii]) != 0 :
break
if jj == ndim :
# mo_coeff[:,ii] == 0
assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
raise RuntimeError( 'ERROR: zero eigen func, should not happen')
continue
else :
if (jj != 0) :
print('gauge ref is not 0')
factor = np.sign(mo_coeff[jj,ii])
ret[:,ii] = factor * mo_coeff[:,ii]
count += 1
# break
# print(count)
return ret
def mol_electron(mol, chkfile=None, verbose=False) :
if verbose:
start_t = time()
nao = mol.nao
natm = mol.natm
rhf = scf.RHF(mol)
if chkfile:
rhf.set(chkfile=chkfile)
erhf = rhf.kernel()
if verbose:
rhf_t = time()
print(f"time of rhf: {rhf_t - start_t}")
mo_energy = rhf.mo_energy
mo_occ = rhf.mo_occ
# mo_coeff = rhf.mo_coeff
mo_coeff_ = rhf.mo_coeff
mo_coeff= fix_gauge(mo_coeff_)
occ_a = (mo_occ>0)
# occ_b = (mo_occ[1]>0)
vir_a = (mo_occ==0)
# vir_b = (mo_occ[1]==0)
nocc_a = sum(occ_a)
# nocc_b = sum(occ_b)
nocc = nocc_a
nvir_a = sum(vir_a)
# nvir_b = sum(vir_b)
nvir = nvir_a
assert(nocc + nvir == nao)
if verbose :
print('nao = %d, nocc = %d, nvir = %d' % \
(nao, nocc, nvir))
print('shape of a and b coeffs: ', mo_coeff[0].shape, mo_coeff[1].shape)
c_occ = mo_coeff[:,occ_a]
c_vir = mo_coeff[:,vir_a]
e_occ = mo_energy[occ_a]
e_vir = mo_energy[vir_a]
c_occ = c_occ.T
c_vir = c_vir.T
meta = [natm, nao, nocc, nvir]
if verbose :
print('shape of coeff data ', c_occ.shape)
print('shape of ener data ', e_occ.shape)
print('shape of coeff data ', c_vir.shape)
print('shape of ener data ', e_vir.shape)
print('E(RKS) = %.9g' % erhf)
return meta, erhf, (e_occ, e_vir), (c_occ, c_vir)
# return erhf, myemp2, ener_data, coeff_data
def dump_data(dir_name, meta, ehf, e_data, c_data) :
os.makedirs(dir_name, exist_ok = True)
np.savetxt(os.path.join(dir_name, 'system.raw'),
np.array(meta).reshape(1,-1),
fmt = '%d',
header = 'natm nao nocc nvir')
nframe = 1
natm = meta[0]
nao = meta[1]
nocc = meta[2]
nvir = meta[3]
# ntest == natm
assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1]))
np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
def gen_frame(xyz_file, basis='ccpvtz', dump_dir=None, verbose=False):
if dump_dir is None:
dump_dir = os.path.splitext(xyz_file)[0]
mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
mol_meta, ehf, e_data, c_data = mol_electron(mol, verbose=verbose)
dump_data(dump_dir, mol_meta, ehf, e_data, c_data)
def main():
parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
parser.add_argument("files", nargs="+", help="input xyz files")
parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
args = parser.parse_args()
for fn in args.files:
if args.dump_dir is None:
dump = None
else:
dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
try:
gen_frame(fn, args.basis, dump, args.verbose)
print(f"{fn} finished")
except Exception as e:
print(f"{fn} failed,", e, file=sys.stderr)
raise
if __name__ == "__main__":
main()
# coding: utf-8
import numpy as np
from pyscf import gto, dft, lib
from pyscf.mp.mp2 import _mo_energy_without_core
from time import time
import os
import sys
import argparse
def parse_xyz(filename, basis='ccpvtz', verbose=False):
with open(filename) as fp:
natoms = int(fp.readline())
comments = fp.readline()
xyz_str = "".join(fp.readlines())
mol = gto.Mole()
mol.verbose = 4 if verbose else 0
mol.atom = xyz_str
mol.basis = basis
mol.build(0,0,unit="Ang")
return mol
def fix_gauge(mo_coeff) :
nvec = mo_coeff.shape[1]
ndim = mo_coeff.shape[0]
ret = np.zeros(mo_coeff.shape)
count = 0
for ii in range(nvec) :
for jj in range(ndim) :
if np.sign(mo_coeff[jj,ii]) != 0 :
break
if jj == ndim :
# mo_coeff[:,ii] == 0
assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
raise RuntimeError( 'ERROR: zero eigen func, should not happen')
continue
else :
if (jj != 0) :
print('gauge ref is not 0')
factor = np.sign(mo_coeff[jj,ii])
ret[:,ii] = factor * mo_coeff[:,ii]
count += 1
# break
# print(count)
return ret
def mol_electron(mol, xc='pbe', chkfile=None, verbose=False) :
if verbose:
start_t = time()
nao = mol.nao
natm = mol.natm
rks = dft.RKS(mol)
rks.xc = xc
if chkfile:
rks.set(chkfile=chkfile)
erks = rks.kernel()
if verbose:
rks_t = time()
print(f"time of rks: {rks_t - start_t}")
mo_energy = rks.mo_energy
mo_occ = rks.mo_occ
# mo_coeff = rks.mo_coeff
mo_coeff_ = rks.mo_coeff
mo_coeff= fix_gauge(mo_coeff_)
occ_a = (mo_occ>0)
# occ_b = (mo_occ[1]>0)
vir_a = (mo_occ==0)
# vir_b = (mo_occ[1]==0)
nocc_a = sum(occ_a)
# nocc_b = sum(occ_b)
nocc = nocc_a
nvir_a = sum(vir_a)
# nvir_b = sum(vir_b)
nvir = nvir_a
assert(nocc + nvir == nao)
if verbose :
print('nao = %d, nocc = %d, nvir = %d' % \
(nao, nocc, nvir))
print('shape of a and b coeffs: ', mo_coeff[0].shape, mo_coeff[1].shape)
c_occ = mo_coeff[:,occ_a]
c_vir = mo_coeff[:,vir_a]
e_occ = mo_energy[occ_a]
e_vir = mo_energy[vir_a]
c_occ = c_occ.T
c_vir = c_vir.T
meta = [natm, nao, nocc, nvir]
if verbose :
print('shape of coeff data ', c_occ.shape)
print('shape of ener data ', e_occ.shape)
print('shape of coeff data ', c_vir.shape)
print('shape of ener data ', e_vir.shape)
print('E(RKS) = %.9g' % erks)
return meta, erks, (e_occ, e_vir), (c_occ, c_vir)
# return erks, myemp2, ener_data, coeff_data
def dump_data(dir_name, meta, ehf, e_data, c_data) :
os.makedirs(dir_name, exist_ok = True)
np.savetxt(os.path.join(dir_name, 'system.raw'),
np.array(meta).reshape(1,-1),
fmt = '%d',
header = 'natm nao nocc nvir')
nframe = 1
natm = meta[0]
nao = meta[1]
nocc = meta[2]
nvir = meta[3]
# ntest == natm
assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
np.savetxt(os.path.join(dir_name, 'e_dft.raw'), np.reshape(ehf, [nframe,1]))
np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
def gen_frame(xyz_file, basis='ccpvtz', xc='pbe', dump_dir=None, verbose=False):
if dump_dir is None:
dump_dir = os.path.splitext(xyz_file)[0]
mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
mol_meta, ehf, e_data, c_data = mol_electron(mol, xc=xc, verbose=verbose)
dump_data(dump_dir, mol_meta, ehf, e_data, c_data)
def main():
parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
parser.add_argument("files", nargs="+", help="input xyz files")
parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
parser.add_argument("-X", "--xc", default='pbe', type=str, help="xc functional")
parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
args = parser.parse_args()
for fn in args.files:
if args.dump_dir is None:
dump = None
else:
dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
try:
gen_frame(fn, args.basis, args.xc, dump, args.verbose)
print(f"{fn} finished")
except Exception as e:
print(f"{fn} failed,", e, file=sys.stderr)
raise
if __name__ == "__main__":
main()
# coding: utf-8
import numpy as np
from pyscf import gto, scf, mp, lib
from pyscf.mp.mp2 import _mo_energy_without_core
from time import time
import os
import sys
import argparse
def my_kernel(mp, mo_energy=None, mo_coeff=None, eris=None, with_eij=True):
if mo_energy is None or mo_coeff is None:
if mp.mo_energy is None or mp.mo_coeff is None:
raise RuntimeError('mo_coeff, mo_energy are not initialized.\n'
'You may need to call mf.kernel() to generate them.')
mo_coeff = None
mo_energy = _mo_energy_without_core(mp, mp.mo_energy)
else:
# For backward compatibility. In pyscf-1.4 or earlier, mp.frozen is
# not supported when mo_energy or mo_coeff is given.
assert(mp.frozen is 0 or mp.frozen is None)
if eris is None: eris = mp.ao2mo(mo_coeff)
nocc = mp.nocc
nvir = mp.nmo - nocc
eia = mo_energy[:nocc,None] - mo_energy[None,nocc:]
if with_eij:
eij = np.empty((nocc,nocc), dtype=eia.dtype)
else:
eij = None
emp2 = 0
for i in range(nocc):
gi = np.asarray(eris.ovov[i*nvir:(i+1)*nvir])
gi = gi.reshape(nvir,nocc,nvir).transpose(1,0,2)
t2i = gi.conj()/lib.direct_sum('jb+a->jba', eia, eia[i])
tmp_eij = 2 * np.einsum('jab,jab->j', t2i, gi) - np.einsum('jab,jba->j', t2i, gi)
emp2 += tmp_eij.sum()
if with_eij:
eij[i] = tmp_eij
return emp2.real, eij.real
def parse_xyz(filename, basis='ccpvtz', verbose=False):
with open(filename) as fp:
natoms = int(fp.readline())
comments = fp.readline()
xyz_str = "".join(fp.readlines())
mol = gto.Mole()
mol.verbose = 4 if verbose else 0
mol.atom = xyz_str
mol.basis = basis
mol.build(0,0,unit="Ang")
return mol
def fix_gauge(mo_coeff) :
nvec = mo_coeff.shape[1]
ndim = mo_coeff.shape[0]
ret = np.zeros(mo_coeff.shape)
count = 0
for ii in range(nvec) :
for jj in range(ndim) :
if np.sign(mo_coeff[jj,ii]) != 0 :
break
if jj == ndim :
# mo_coeff[:,ii] == 0
assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
raise RuntimeError( 'ERROR: zero eigen func, should not happen')
continue
else :
if (jj != 0) :
print('gauge ref is not 0')
factor = np.sign(mo_coeff[jj,ii])
ret[:,ii] = factor * mo_coeff[:,ii]
count += 1
# break
# print(count)
return ret
def mol_electron(mol, frozen=0, chkfile=None, verbose=False) :
if verbose:
start_t = time()
nao = mol.nao
natm = mol.natm
rhf = scf.RHF(mol)
if chkfile:
rhf.set(chkfile=chkfile)
erhf = rhf.kernel()
if verbose:
rhf_t = time()
print(f"time of rhf: {rhf_t - start_t}")
mo_energy = rhf.mo_energy
mo_occ = rhf.mo_occ
# mo_coeff = rhf.mo_coeff
mo_coeff_ = rhf.mo_coeff
mo_coeff= fix_gauge(mo_coeff_)
occ_a = (mo_occ>0)
occ_a[:frozen] = False
# occ_b = (mo_occ[1]>0)
vir_a = (mo_occ==0)
# vir_b = (mo_occ[1]==0)
nocc_a = sum(occ_a)
# nocc_b = sum(occ_b)
nocc = nocc_a
nvir_a = sum(vir_a)
# nvir_b = sum(vir_b)
nvir = nvir_a
assert(nocc + nvir + frozen == nao)
if verbose :
print('nao = %d, nocc = %d, nvir = %d' % \
(nao, nocc, nvir))
print('shape of a and b coeffs: ', mo_coeff[0].shape, mo_coeff[1].shape)
c_occ = mo_coeff[:,occ_a]
c_vir = mo_coeff[:,vir_a]
e_occ = mo_energy[occ_a]
e_vir = mo_energy[vir_a]
c_occ = c_occ.T
c_vir = c_vir.T
meta = [natm, nao, nocc, nvir]
if verbose :
print('shape of coeff data ', c_occ.shape)
print('shape of ener data ', e_occ.shape)
print('shape of coeff data ', c_vir.shape)
print('shape of ener data ', e_vir.shape)
mid_t = time()
# print(f"time of collecting results: {mid_t - rhf_t}")
mp2 = mp.MP2(rhf, frozen=frozen)
# emp2 = mp2.kernel()
emp2, emp2_ij = my_kernel(mp2)
if verbose :
print('E(HF) = %.9g' % erhf)
print('E(RMP2) = %.9g' % emp2)
print(f"time of mp2: {time()-mid_t}")
return meta, erhf, emp2, emp2_ij, (e_occ, e_vir), (c_occ, c_vir)
# return erhf, myemp2, ener_data, coeff_data
def dump_data(dir_name, meta, ehf, emp2, ec_ij, e_data, c_data) :
os.makedirs(dir_name, exist_ok = True)
np.savetxt(os.path.join(dir_name, 'system.raw'),
np.array(meta).reshape(1,-1),
fmt = '%d',
header = 'natm nao nocc nvir')
nframe = 1
natm = meta[0]
nao = meta[1]
nocc = meta[2]
nvir = meta[3]
# ntest == natm
assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
assert(all(ec_ij.shape == np.array([nocc, nocc], dtype = int)))
np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1]))
np.savetxt(os.path.join(dir_name, 'e_mp2.raw'), np.reshape(emp2, [nframe,1]))
np.savetxt(os.path.join(dir_name, 'ec_ij.raw'), ec_ij.reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
def gen_frame(xyz_file, basis='ccpvtz', frozen=0, dump_dir=None, verbose=False):
if dump_dir is None:
dump_dir = os.path.splitext(xyz_file)[0]
mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
mol_meta, ehf, emp2, ec_ij, e_data, c_data = mol_electron(mol, frozen=frozen, verbose=verbose)
dump_data(dump_dir, mol_meta, ehf, emp2, ec_ij, e_data, c_data)
def main():
parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
parser.add_argument("files", nargs="+", help="input xyz files")
parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
parser.add_argument("-F", "--frozen", default=0, type=int, help="number of orbit to be frozen when calculate mp2")
parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
args = parser.parse_args()
for fn in args.files:
if args.dump_dir is None:
dump = None
else:
dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
try:
gen_frame(fn, args.basis, args.frozen, dump, args.verbose)
print(f"{fn} finished")
except Exception as e:
print(f"{fn} failed,", e, file=sys.stderr)
raise
if __name__ == "__main__":
main()
#!/usr/bin/env python
#SBATCH -N 1
#SBATCH -c 10
#SBATCH -t 24:00:00
#SBATCH --mem=32G
import time
import numpy as np
from pyscf import gto, scf
BOHR = 0.52917721092
_NO_FORCE = False
_NO_DM = False
_MUST_UNRES = False
_USE_NEWTON = False
def parse_xyz(filename, basis='ccpvdz', **kwargs):
with open(filename) as fp:
natoms = int(fp.readline())
comments = fp.readline()
xyz_str = "".join(fp.readlines())
mol = gto.Mole()
mol.atom = xyz_str
mol.basis = basis
mol.set(**kwargs)
if "spin" not in kwargs:
mol.spin = mol.nelectron % 2
mol.build(0,0,unit="Ang")
return mol
def get_method(name: str):
lname = name.lower()
if lname == "hf":
return calc_hf
if lname[:3] == "dft":
xc = lname.split("@")[1] if "@" in lname else "pbe"
return lambda mol, **scfargs: calc_dft(mol, xc, **scfargs)
if lname == "mp2":
return calc_mp2
if lname == "ccsd":
return calc_ccsd
if lname.startswith(("ccsd_t", "ccsd-t", "ccsd(t)")):
return calc_ccsd_t
if lname == "fci":
return calc_fci
raise ValueError(f"Unknown calculation method: {name}")
def solve_scf(mol, **scfargs):
HFmethod = scf.HF if not _MUST_UNRES else scf.UHF
mf = HFmethod(mol).set(init_guess_breaksym=True)
init_dm = mf.get_init_guess()
# if _MUST_UNRES:
# init_dm[1][:2,:2] = 0
mf.kernel(init_dm)
if _USE_NEWTON:
mf = scf.fast_newton(mf)
return mf
def calc_hf(mol, **scfargs):
mf = solve_scf(mol, **scfargs)
if not mf.converged:
raise RuntimeError("SCF not converged!")
etot = mf.e_tot
grad = mf.nuc_grad_method().kernel() if not _NO_FORCE else None
rdm = mf.make_rdm1() if not _NO_DM else None
return etot, grad, rdm
def calc_dft(mol, xc="pbe", **scfargs):
from pyscf import dft
KSmethod = dft.KS if not _MUST_UNRES else dft.UKS
mf = KSmethod(mol, xc).run(**scfargs)
if not mf.converged:
raise RuntimeError("SCF not converged!")
etot = mf.e_tot
if _NO_FORCE or dft.libxc.xc_type(xc) in ('MGGA', 'NLC'):
grad = None
else:
grad = mf.nuc_grad_method().kernel()
rdm = mf.make_rdm1() if not _NO_DM else None
return etot, grad, rdm
def calc_mp2(mol, **scfargs):
import pyscf.mp
mf = solve_scf(mol, **scfargs)
if not mf.converged:
raise RuntimeError("SCF not converged!")
postmf = pyscf.mp.MP2(mf).run()
etot = postmf.e_tot
grad = postmf.nuc_grad_method().kernel() if not _NO_FORCE else None
return etot, grad, None
def calc_ccsd(mol, **scfargs):
import pyscf.cc
mf = solve_scf(mol, **scfargs)
if not mf.converged:
raise RuntimeError("SCF not converged!")
mycc = mf.CCSD().run()
etot = mycc.e_tot
grad = mycc.nuc_grad_method().kernel() if not _NO_FORCE else None
ccdm = np.einsum('...pi,...ij,...qj->...pq',
mf.mo_coeff, mycc.make_rdm1(), mf.mo_coeff.conj()) if not _NO_DM else None
return etot, grad, ccdm
def calc_ccsd_t(mol, **scfargs):
import pyscf.cc
mf = solve_scf(mol, **scfargs)
if not mf.converged:
raise RuntimeError("SCF not converged!")
mycc = mf.CCSD().run()
et_correction = mycc.ccsd_t()
etot = mycc.e_tot + et_correction
if _NO_FORCE:
return etot, None, None
import pyscf.grad.ccsd_t as ccsd_t_grad
grad = ccsd_t_grad.Gradients(mycc).kernel()
return etot, grad, None
def calc_fci(mol, **scfargs):
import pyscf.fci
mf = solve_scf(mol, **scfargs)
if not mf.converged:
raise RuntimeError("SCF not converged!")
myci = pyscf.fci.FCI(mf)
etot, fcivec = myci.kernel()
rdm = np.einsum('...pi,...ij,...qj->...pq',
mf.mo_coeff,
myci.make_rdm1s(fcivec, mol.nao, mol.nelec),
mf.mo_coeff.conj()).sum(0) if not _NO_DM else None
return etot, None, rdm
if __name__ == "__main__":
import argparse
import os
parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
parser.add_argument("files", nargs="+", help="input xyz files")
parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file")
parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information")
parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation")
parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule")
parser.add_argument("-S", "--spin", default=0, type=int, help="net spin of the molecule")
parser.add_argument("-M", "--method", default="ccsd", help="method used to do the calculation. support MP2, CCSD and CCSD(T)")
parser.add_argument("-U", "--unrestrict", action="store_true", help="force using unrestricted methods")
parser.add_argument("-NF", "--no-force", action="store_true", help="do not calculate force")
parser.add_argument("-ND", "--no-dm", action="store_true", help="do not calculate dm")
parser.add_argument("-SO", "--newton", action="store_true", help="allow using newton method when scf not converged")
parser.add_argument("--scf-input", help="yaml file to specify scf arguments")
args = parser.parse_args()
if args.unrestrict: _MUST_UNRES = True
if args.no_force: _NO_FORCE = True
if args.no_dm: _NO_DM = True
if args.newton: _USE_NEWTON = True
scfargs = {}
if args.scf_input is not None:
import ruamel.yaml as yaml
with open(args.scf_input, 'r') as fp:
scfargs = yaml.safe_load(fp)
if args.dump_dir is not None:
os.makedirs(args.dump_dir, exist_ok = True)
calculator = get_method(args.method)
for fn in args.files:
tic = time.time()
mol = parse_xyz(fn, args.basis, verbose=args.verbose, charge=args.charge, spin=args.spin)
try:
res = calculator(mol, **scfargs)
except RuntimeError as err:
print(fn, f"failed, {err}")
continue
etot, grad, rdm = res
if args.dump_dir is None:
dump_dir = os.path.dirname(fn)
else:
dump_dir = args.dump_dir
dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0])
np.save(dump+".energy.npy", [etot])
if grad is not None:
force = -grad / BOHR
np.save(dump+".force.npy", force)
if rdm is not None:
np.save(dump+".dm.npy", rdm)
if args.verbose:
print(fn, f"done, time = {time.time()-tic}")
\ No newline at end of file
import pathlib
import setuptools
here = pathlib.Path(__file__).parent.resolve()
readme = (here / 'README.md').read_text(encoding='utf-8')
# did not include torch and pyscf here
install_requires=['numpy', 'paramiko', 'ruamel.yaml']
setuptools.setup(
name="deepks",
use_scm_version={'write_to': 'deepks/_version.py'},
setup_requires=['setuptools_scm'],
author="Yixiao Chen",
author_email="yixiaoc@princeton.edu",
description="DeePKS-kit: generate accurate (self-consistent) energy functionals",
long_description=readme,
long_description_content_type="text/markdown",
packages=setuptools.find_packages(include=['deepks', 'deepks.*']),
classifiers=[
"Programming Language :: Python :: 3.7",
],
keywords='deepks DeePKS-kit',
install_requires=install_requires,
python_requires=">=3.7",
entry_points={
'console_scripts': [
'deepks=deepks.main:main_cli',
'dks=deepks.main:main_cli',
],
},
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment