import os
import sys
import torch
import numpy as np
import pyscf
from pyscf import gto
from sklearn import linear_model

sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../../')
from deepks.scf.scf import DSCF
from deepks.scf.run import build_mol, solve_mol

def get_linear_model(weig, wec):
#     too_small = weig.reshape(-1,108).std(0) < 1e-3
    wreg = linear_model.Ridge(1e-7, tol=1e-9)
    wreg.fit(weig.sum(1)[:], wec[:])
    linear = torch.nn.Linear(108,1).double()
    linear.weight.data[:] = torch.from_numpy(wreg.coef_)
    linear.bias.data[:] = torch.tensor(wreg.intercept_ / 3)
    model = lambda x: linear(x).sum(1)
    return model

def get_linear_model_normed(weig, wec, stdmin=1e-3):
#     too_small = weig.reshape(-1,108).std(0) < 1e-3
    input_scale = weig.reshape(-1,108).std(0).clip(stdmin)
    t_input_scale = torch.from_numpy(input_scale)
    weig /= input_scale
    wreg = linear_model.Ridge(1e-7, tol=1e-9)
    wreg.fit(weig.sum(1)[:], wec[:])
    linear = torch.nn.Linear(108,1).double()
    linear.weight.data[:] = torch.from_numpy(wreg.coef_)
    linear.bias.data[:] = torch.tensor(wreg.intercept_ / 3)
    model = lambda x: linear(x / t_input_scale).sum(1)
    return model

nmol = 1000
ntrain = 900
niter = 10

mol_list = [build_mol(f'../path/to/data/water/geometry/{i:0>5}.xyz') for i in range(nmol)]
ehfs = np.load('../path/to/data/water/rproj_mb2/e_hf.npy').reshape(-1)[:nmol]
wene = np.loadtxt('../path/to/data/water/energy.dat', usecols=(1,2,3,4))[:nmol]
erefs = wene[:,3]
ecfs = ehfs
ecs = erefs - ehfs
ceigs = np.load('../../../data/tom_miller/water/rproj_mb2/dm_eig.npy')[:nmol]
model = get_linear_model(ceigs[:ntrain], ecs[:ntrain])

os.makedirs('dump', exist_ok=True)
np.save('dump/000.ehfs.npy', ehfs)
np.save('dump/000.ecfs.npy', ecfs)
np.save('dump/000.ceigs.npy', ceigs)
np.save('dump/000.ecs.npy', ecs)
np.save('dump/000.convs.npy', np.ones(ehfs.shape, dtype=bool))

for i in range(1, niter+1):
    oldecfs, oldceigs, oldehfs = ecfs, ceigs, ehfs
    oldecs = ecs
    oldmodel = model
    
    results = [solve_mol(mol, model) for mol in mol_list]
    meta, ehfs, ecfs, cdms, ceigs, convs = map(np.array, zip(*results))
    ecs = erefs - ehfs
    model = get_linear_model(ceigs[:ntrain], ecs[:ntrain])
    
    print((ecfs - erefs).mean(), np.abs(ecfs - erefs).mean())
    
    np.save(f'dump/{i:0>3}.ehfs.npy', ehfs)
    np.save(f'dump/{i:0>3}.ecfs.npy', ecfs)
    np.save(f'dump/{i:0>3}.ceigs.npy', ceigs)
    np.save(f'dump/{i:0>3}.ecs.npy', ecs)
    np.save(f'dump/{i:0>3}.convs.npy', convs)