import pandas as pd
import os,torch,re,jsonlines
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
PATH = "../models/7B" #checkpoint path
mmlu_dataset = "../mmlu/" #dataset path

def get_few_shot_prompt(filename):
    filepath = os.path.join(mmlu_dataset,"dev",filename.replace("test","dev"))
    df = pd.read_csv(filepath,header=None,names=["input","A","B","C","D","answer"])
    prompts = []
    _hint = f'There is a single choice question about {filename.replace("_test.csv", " ")}. Answer the question by replying A, B, C or D.'
    for i in range(len(df)):
        line = df.iloc[i]
        user_content = f"{_hint}\nQuestion: {line['input']}\nA. {line['A']}\nB. {line['B']}\nC. {line['C']}\nD. {line['D']}\nAnswer: "
        bot_content = f"{line[line['answer']]}\n"
        prompts.extend([{"role": "user", "content": user_content},{"role": "bot", "content": bot_content}])
    return prompts[:10]

# get result
def get_input_data(test_file_path,filename):
    _hint = f'There is a single choice question about {filename.replace("_test.csv", " ")}. Answer the question by replying A, B, C or D.'
    df = pd.read_csv(os.path.join(test_file_path,filename),header=None,names=["input","A","B","C","D","answer"])
    data = []
    for i in range(len(df)):
        line = df.iloc[i]
        input = f"{_hint}\nQuestion: {line['input']}\nA. {line['A']}\nB. {line['B']}\nC. {line['C']}\nD. {line['D']}\nAnswer: "
        target = line["answer"]
        data.append({"input":input,"target":target})
    return data
# post process
def get_capital_answer(text):
    patterns = [
        "the answer is ([A-E])",
        "the answer is([A-E])",
        "Answer: ([A-E])",
        "Answer: \(([A-E])\)",
        "Option \(([A-E])\)",
        "Answer:([A-E])",
        "Option ([A-E])",
        "Opt ([A-E])"
    ]
    for pattern in patterns:
        match = re.search(pattern,text,re.IGNORECASE)
        if match:
            return match.group(1)
    match = re.findall("[A-D]", text)
    if match:
        return match[0]
    return ""
if __name__ == "__main__":
    tokenizer = AutoTokenizer.from_pretrained(PATH)
    model = AutoModelForCausalLM.from_pretrained(PATH, trust_remote_code=True, device_map="auto",
                                                torch_dtype=torch.float16)
    generate_config = GenerationConfig.from_pretrained(PATH)
    generate_config.temperature = 0.1
    generate_config.top_k = 50
    generate_config.top_p = 0.95
    print(generate_config)
    model.eval()
    test_file_path = mmlu_dataset + "test"
    filenames = os.listdir(test_file_path)
    score_list = []
    for filename in filenames:
        score, total = 0, 0
        few_shot_prompt = get_few_shot_prompt(filename)
        input_list = get_input_data(test_file_path,filename)
        for line in tqdm(input_list):
            answer, history = model.chat(tokenizer = tokenizer, question=line["input"], history=few_shot_prompt, generation_config = generate_config,stream=False)
            answer = get_capital_answer(answer)
            if answer == line["target"]:
                score +=1
            total += 1
        score_list.append(score/total)
    final_score = sum(score_list)/len(score_list)
    print(final_score)