v1.0

b6edc328 · chenzk · b6edc328 · b6edc328 · b6edc328 · b6edc328
Commit b6edc328 authored Jan 17, 2025 by chenzk
20 changed files
--- a/scripts/lcb_runner/prompts/__init__.py
+++ b/scripts/lcb_runner/prompts/__init__.py
+from lcb_runner.prompts.code_execution import format_prompt_execution, format_prompt_execution_cot
+from lcb_runner.prompts.code_generation import format_prompt_generation
+from lcb_runner.prompts.test_output_prediction import format_prompt_test_output
+from lcb_runner.prompts.self_repair import format_prompt_self_repair
--- a/scripts/lcb_runner/prompts/code_execution.py
+++ b/scripts/lcb_runner/prompts/code_execution.py
+import json
+
+from lcb_runner.lm_styles import LMStyle
+from lcb_runner.benchmarks import CodeExecutionProblem
+
+
+def make_cot_output_prompt(s):
+    code, input = s
+    return f"""You are given a Python function and an assertion containing an input to the function. Complete the assertion with a literal (no unsimplified expressions, no function calls) containing the output when executing the provided code on the given input, even if the function is incorrect or incomplete. Do NOT output any extra information. Execute the program step by step before arriving at an answer, and provide the full assertion with the correct output in [ANSWER] and [/ANSWER] tags, following the examples.
+
+[PYTHON]
+def performOperation(s):
+    s = s + s
+    return "b" + s + "a"
+assert performOperation(s = "hi") == ??
+[/PYTHON]
+[THOUGHT]
+Let's execute the code step by step:
+
+1. The function performOperation is defined, which takes a single argument s.
+2. The function is called with the argument "hi", so within the function, s is initially "hi".
+3. Inside the function, s is concatenated with itself, so s becomes "hihi".
+4. The function then returns a new string that starts with "b", followed by the value of s (which is now "hihi"), and ends with "a".
+5. The return value of the function is therefore "bhihia".
+[/THOUGHT]
+[ANSWER]
+assert performOperation(s = "hi") == "bhihia"
+[/ANSWER]
+
+[PYTHON]
+{code}
+assert {input} == ??
+[/PYTHON]
+[THOUGHT]
+"""
+
+
+def make_direct_output_prompt(s):
+    code, input = s
+    return f"""You are given a Python function and an assertion containing an input to the function. Complete the assertion with a literal (no unsimplified expressions, no function calls) containing the output when executing the provided code on the given input, even if the function is incorrect or incomplete. Do NOT output any extra information. Provide the full assertion with the correct output in [ANSWER] and [/ANSWER] tags, following the examples.
+
+[PYTHON]
+def repeatNumber(number : int) -> int:
+    return number
+assert repeatNumber(number = 17) == ??
+[/PYTHON]
+[ANSWER]
+assert repeatNumber(number = 17) == 17
+[/ANSWER]
+
+[PYTHON]
+def addCharacterA(string : str) -> str:
+    return string + "a"
+assert addCharacterA(string = "x9j") == ??
+[/PYTHON]
+[ANSWER]
+assert addCharacterA(string = "x9j") == "x9ja"
+[/ANSWER]
+
+[PYTHON]
+{code}
+assert {input} == ??
+[/PYTHON]
+[ANSWER]
+"""
+
+
+def format_prompt_execution(question, LanguageModelStyle):
+    return format_prompt_execution_base(question, LanguageModelStyle, False)
+
+
+def format_prompt_execution_cot(question, LanguageModelStyle):
+    return format_prompt_execution_base(question, LanguageModelStyle, True)
+
+
+def format_prompt_execution_base(
+    question: CodeExecutionProblem, LanguageModelStyle: LMStyle, cot: bool
+) -> str:
+    code = question.code
+    input = question.input
+    system_message = "You are an expert at Python programming, code execution, test case generation, and fuzzing."
+    if cot:
+        prompt = make_cot_output_prompt((code, input))
+    else:
+        prompt = make_direct_output_prompt((code, input))
+
+    if LanguageModelStyle == LMStyle.OpenAIChat:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": system_message,
+            },
+        ]
+        chat_messages += [
+            {"role": "user", "content": prompt},
+        ]
+        return chat_messages
+    if LanguageModelStyle == LMStyle.LLaMa3:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": system_message,
+            },
+        ]
+        chat_messages += [
+            {"role": "user", "content": prompt},
+        ]
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "meta-llama/Meta-Llama-3-8B-Instruct", padding_side="left", use_fast=False
+        )
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+
+    elif LanguageModelStyle == LMStyle.Claude:
+        return prompt
+    elif LanguageModelStyle == LMStyle.Claude3:
+        prompt = [
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ]
+        return system_message, prompt
+    elif LanguageModelStyle == LMStyle.Gemini:
+        return prompt
+    elif LanguageModelStyle == LMStyle.StarCoderInstruct:
+        return prompt
+    elif LanguageModelStyle == LMStyle.DeepSeekCodeInstruct:
+        return prompt
+    elif LanguageModelStyle == LMStyle.CodeLLaMaInstruct:
+        return prompt
+    elif LanguageModelStyle == LMStyle.MagiCoder:
+        return prompt
+    elif LanguageModelStyle == LMStyle.WizardCoder:
+        return prompt
+    elif LanguageModelStyle == LMStyle.Phind:
+        return prompt
+    elif LanguageModelStyle == LMStyle.OC:
+        return prompt
+    elif LanguageModelStyle == LMStyle.MistralWeb:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": system_message,
+            },
+            {"role": "user", "content": prompt},
+        ]
+        return chat_messages
+    elif LanguageModelStyle == LMStyle.DracarysLlama:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": system_message,
+            },
+        ]
+        chat_messages += [
+            {"role": "user", "content": prompt},
+        ]
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "abacusai/Dracarys-Llama-3.1-70B-Instruct", padding_side="right", use_fast=False
+        )
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+    elif LanguageModelStyle == LMStyle.DracarysQwen:
+        return prompt
+    else:
+        raise NotImplementedError(
+            f"LanguageModelStyle {LanguageModelStyle} not implemented"
+        )
--- a/scripts/lcb_runner/prompts/code_generation.py
+++ b/scripts/lcb_runner/prompts/code_generation.py
+import json
+
+try:
+    from anthropic import HUMAN_PROMPT, AI_PROMPT
+except ImportError:
+    HUMAN_PROMPT = None
+    AI_PROMPT = None
+
+from lcb_runner.lm_styles import LMStyle
+from lcb_runner.benchmarks.code_generation import CodeGenerationProblem
+
+
+class PromptConstants:
+    SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program."
+
+    SYSTEM_MESSAGE_GEMINI = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Do NOT use system calls like `exit` in the generated program."
+
+    SYSTEM_MESSAGE_DEEPSEEK = f"You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you answer questions related to computer science."
+
+    SYSTEM_MESSAGE_MAGIC = f"You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.\n\n@@ Instruction\n"
+
+    SYSTEM_MESSAGE_WIZARD = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+
+    SYSTEM_MESSAGE_PHIND = f"""You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Put your fixed program within code delimiters, for example: 
+```python 
+# YOUR CODE HERE
+```"""
+
+    SYSTEM_MESSAGE_CODEQWEN = (
+        f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user"
+    )
+
+    FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters."
+
+    FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows."
+
+
+def get_generic_question_template_answer(question: CodeGenerationProblem):
+    prompt = f"### Question:\n{question.question_content}\n\n"
+    if question.starter_code:
+        prompt += (
+            f"### Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        )
+        prompt += f"```python\n{question.starter_code}\n```\n\n"
+    else:
+        prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+        prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+
+def get_cllama_question_template_answer(question: CodeGenerationProblem):
+    prompt = f"### Question\n{question.question_content}\n\n"
+    if question.starter_code:
+        prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        prompt += f"[PYTHON]\n{question.starter_code}\n[/PYTHON]\n\n"
+    else:
+        prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+        prompt += f"[PYTHON]\n# WRITE YOUR CODE HERE\n[/PYTHON]\n\n"
+    prompt += f"### ANSWER (use the provided delimiters, read the inputs from stdin and write response to stdout)\n\n"
+    return prompt
+
+
+def get_deepseekcode_question_template_answer(question: CodeGenerationProblem):
+    prompt = f"### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
+    prompt += f"Question:\n{question.question_content}\n\n"
+    if question.starter_code:
+        prompt += (
+            f"### Instruction: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        )
+        prompt += f"```python\n{question.starter_code}\n```\n\n"
+    else:
+        prompt += (
+            f"### Instruction: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+        )
+        prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Response:\n\n"
+    return prompt
+
+
+def get_qwen_question_template_answer(question: CodeGenerationProblem):
+    from transformers import AutoTokenizer
+
+    tokenizer = AutoTokenizer.from_pretrained(
+        "/abacus/models/Qwen1.5-72B-Chat/", padding_side="left", use_fast=False
+    )
+    prompt = "You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
+    prompt += f"Question:\n{question.question_content}\n\n"
+    if question.starter_code:
+        prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        prompt += f"```python\n{question.starter_code}\n```\n\n"
+    else:
+        prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n"
+        prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
+
+    messages = [
+        {"role": "system", "content": PromptConstants.SYSTEM_MESSAGE_GENERIC},
+        {"role": "user", "content": prompt},
+    ]
+
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        truncation=False,
+        padding=False,
+    )
+    return prompt
+
+
+def get_magicoder_question_template_answer(question: CodeGenerationProblem):
+    prompt = f"You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
+    prompt += f"Question:\n{question.question_content}\n\n"
+    if question.starter_code:
+        prompt += f"Format: {PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        prompt += f"```python\n{question.starter_code}\n```\n\n"
+    else:
+        prompt += f"Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+        prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"@@ Response\n"
+    return prompt
+
+
+def get_wizard_question_template_answer(question: CodeGenerationProblem):
+    prompt = f"""### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. Put your fixed program within code delimiters, for example:
+```python 
+# YOUR CODE HERE
+```
+"""
+    prompt += f"{question.question_content}\n\n"
+    if question.starter_code:
+        prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        prompt += f"```python\n{question.starter_code}\n```\n\n"
+    else:
+        prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n"
+        prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Response:\n\n"
+    return prompt
+
+
+def get_phind_question_template_answer(question: CodeGenerationProblem):
+    prompt = f"{question.question_content}\n\n"
+    if question.starter_code:
+        prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        prompt += f"```python\n{question.starter_code}\n```\n\n"
+    else:
+        prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n"
+        prompt += f"```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"\n\n### Assistant"
+    return prompt
+
+
+def get_codeqwen_question_template_answer(question: CodeGenerationProblem):
+    prompt = "You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
+    prompt += f"Question: {question.question_content}\n\n"
+    if question.starter_code:
+        prompt += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
+        prompt += f"```python\n{question.starter_code}\n```\n\n<|im_end|>\n"
+    else:
+        prompt += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+        prompt += f"```python\n# YOUR CODE HERE\n```\n\n<|im_end|>\n"
+    prompt += f"<|im_start|>assistant\n"
+    return prompt
+
+
+with open("lcb_runner/prompts/few_shot_examples/generation/func.json") as f:
+    func = json.load(f)
+
+with open("lcb_runner/prompts/few_shot_examples/generation/stdin.json") as f:
+    stdin = json.load(f)
+
+
+def get_base_model_question_template_answer(question: CodeGenerationProblem):
+    if question.starter_code:
+        examples_json = func
+    else:
+        examples_json = stdin
+
+    def get_example_prompt(example):
+        prompt = ""
+        prompt += "### Question\n"
+        prompt += example["question"]
+        prompt += "\n\n"
+        if question.starter_code:
+            prompt += "### Starter Code\n"
+            prompt += example["sample_code"]
+            prompt += "\n\n"
+        prompt += "### Answer\n\n"
+        prompt += example["answer"]
+        if example["answer"]:
+            prompt += "\n\n"
+        return prompt
+
+    prompt = ""
+    prompt += get_example_prompt(examples_json[0])
+    prompt += get_example_prompt(
+        {
+            "question": question.question_content,
+            "sample_code": question.starter_code,
+            "answer": "",
+        }
+    )
+    return prompt
+
+
+def format_prompt_generation(
+    question: CodeGenerationProblem, LanguageModelStyle: LMStyle
+) -> str:
+    if LanguageModelStyle in [LMStyle.OpenAIChat, LMStyle.DeepSeekAPI]:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
+            },
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(question),
+            },
+        ]
+        return chat_messages
+    elif LanguageModelStyle == LMStyle.OpenAIReason:
+        chat_messages = [
+            {
+                "role": "user",
+                "content": PromptConstants.SYSTEM_MESSAGE_GENERIC
+                + "\n\n"
+                + get_generic_question_template_answer(question),
+            },
+        ]
+        return chat_messages
+
+    if LanguageModelStyle == LMStyle.LLaMa3:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
+            },
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(question),
+            },
+        ]
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "meta-llama/Meta-Llama-3-8B-Instruct", padding_side="left", use_fast=False
+        )
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+
+    if LanguageModelStyle == LMStyle.Claude:
+        prompt = f"{HUMAN_PROMPT}\n"
+        prompt += f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n"
+        prompt += f"{get_generic_question_template_answer(question).rstrip()}\n"
+        prompt += f"{AI_PROMPT}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.Claude3:
+        system = PromptConstants.SYSTEM_MESSAGE_GENERIC
+        prompt = [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(question).rstrip(),
+            }
+        ]
+        return system, prompt
+
+    if LanguageModelStyle == LMStyle.Gemini:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_GEMINI}\n"
+        prompt += f"{get_generic_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.StarCoderInstruct:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n"
+        prompt += f"{get_generic_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.MistralWeb:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
+            },
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(question),
+            },
+        ]
+        return chat_messages
+
+    if LanguageModelStyle == LMStyle.CohereCommand:
+        chat_messages = [
+            {
+                "role": "System",
+                "message": PromptConstants.SYSTEM_MESSAGE_GENERIC,
+            },
+        ]
+        message = get_generic_question_template_answer(question)
+        return chat_messages, message
+
+    if LanguageModelStyle == LMStyle.DeepSeekCodeInstruct:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_DEEPSEEK}\n\n"
+        prompt += f"{get_deepseekcode_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.CodeQwenInstruct:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_CODEQWEN}\n\n"
+        prompt += f"{get_codeqwen_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.CodeLLaMaInstruct:
+        prompt = f"[INST] <<SYS>>\n"
+        prompt += f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n"
+        prompt += f"<</SYS>>\n\n"
+        prompt += f"{get_cllama_question_template_answer(question)}\n"
+        prompt += f"[/INST]"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.MagiCoder:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_MAGIC}\n"
+        prompt += f"{get_magicoder_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.WizardCoder:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_WIZARD}\n\n"
+        prompt += f"{get_wizard_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.Phind:
+        prompt = f"### System Prompt\n\n"
+        prompt += f"{PromptConstants.SYSTEM_MESSAGE_PHIND}\n\n"
+        prompt += f"### User Message\n\n"
+        prompt += f"{get_phind_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.OC:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n"
+        prompt += f"{get_generic_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.Eurusx:
+        prompt = "[INST] Write Python code to solve the task:\n"
+        prompt += f"{get_generic_question_template_answer(question)}"
+        prompt += "[/INST]"
+        return prompt
+
+    if (
+        LanguageModelStyle == LMStyle.Smaug2
+        or LanguageModelStyle == LMStyle.Qwen1point5
+    ):
+        prompt = f"{get_qwen_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.GenericBase:
+        prompt = get_base_model_question_template_answer(question)
+        return prompt
+
+    if LanguageModelStyle == LMStyle.DracarysQwen:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_CODEQWEN}\n\n"
+        prompt += f"{get_codeqwen_question_template_answer(question)}"
+        return prompt
+
+    if LanguageModelStyle == LMStyle.DracarysLlama:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
+            },
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(question),
+            },
+        ]
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "abacusai/Dracarys-Llama-3.1-70B-Instruct",
+            padding_side="right",
+            use_fast=False,
+        )
+
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+
+    raise NotImplementedError(
+        f"LanguageModelStyle {LanguageModelStyle} not implemented"
+    )
+
+
+def test():
+    import pathlib
+
+    base_dir = "logs/example_prompts/generation"
+    pathlib.Path(base_dir).mkdir(parents=True, exist_ok=True)
+
+    for lmstyle in LMStyle:
+        generation_problem = CodeGenerationProblem(
+            "title",
+            "question-content",
+            "leetcode",
+            "question_id",
+            "contest_id",
+            "contest_date",
+            "",
+            "easy",
+            "[]",
+            "[]",
+            "{}",
+        )
+        prompt1 = format_prompt_generation(generation_problem, lmstyle)
+        with open(f"{base_dir}/{lmstyle}_1.txt", "w") as f:
+            try:
+                f.write(prompt1)
+            except TypeError:
+                f.write(json.dumps(prompt1))
+
+        generation_problem.starter_code = "starter code"
+        prompt2 = format_prompt_generation(generation_problem, lmstyle)
+        with open(f"{base_dir}/{lmstyle}_2.txt", "w") as f:
+            try:
+                f.write(prompt2)
+            except TypeError:
+                f.write(json.dumps(prompt2))
+
+
+if __name__ == "__main__":
+    test()
--- a/scripts/lcb_runner/prompts/few_shot_examples/generation/func.json
+++ b/scripts/lcb_runner/prompts/few_shot_examples/generation/func.json
+[
+    {
+        "question": "You are given a 0-indexed array of positive integers nums. Find the number of triplets (i, j, k) that meet the following conditions:\n\n0 <= i < j < k < nums.length\nnums[i], nums[j], and nums[k] are pairwise distinct.\n\t\nIn other words, nums[i] != nums[j], nums[i] != nums[k], and nums[j] != nums[k].\n\n\n\nReturn the number of triplets that meet the conditions.\n \nExample 1:\n\nInput: nums = [4,4,2,4,3]\nOutput: 3\nExplanation: The following triplets meet the conditions:\n- (0, 2, 4) because 4 != 2 != 3\n- (1, 2, 4) because 4 != 2 != 3\n- (2, 3, 4) because 2 != 4 != 3\nSince there are 3 triplets, we return 3.\nNote that (2, 0, 4) is not a valid triplet because 2 > 0.\n\nExample 2:\n\nInput: nums = [1,1,1,1,1]\nOutput: 0\nExplanation: No triplets meet the conditions so we return 0.\n\n \nConstraints:\n\n3 <= nums.length <= 100\n1 <= nums[i] <= 1000\n\n",
+        "sample_code": "class Solution:\n    def unequalTriplets(self, nums: List[int]) -> int:\n        ",
+        "answer": "class Solution:\n    def unequalTriplets(self, a: List[int]) -> int:\n        ans = 0\n        n = len(a)\n        for i in range(n):\n            for j in range(i + 1, n):\n                for k in range(j + 1, n):\n                    ans += len({a[i], a[j], a[k]}) == 3\n        return ans"
+    },
+    {
+        "question": "You are given two strings s and t consisting of only lowercase English letters.\nReturn the minimum number of characters that need to be appended to the end of s so that t becomes a subsequence of s.\nA subsequence is a string that can be derived from another string by deleting some or no characters without changing the order of the remaining characters.\n \nExample 1:\n\nInput: s = \"coaching\", t = \"coding\"\nOutput: 4\nExplanation: Append the characters \"ding\" to the end of s so that s = \"coachingding\".\nNow, t is a subsequence of s (\"coachingding\").\nIt can be shown that appending any 3 characters to the end of s will never make t a subsequence.\n\nExample 2:\n\nInput: s = \"abcde\", t = \"a\"\nOutput: 0\nExplanation: t is already a subsequence of s (\"abcde\").\n\nExample 3:\n\nInput: s = \"z\", t = \"abcde\"\nOutput: 5\nExplanation: Append the characters \"abcde\" to the end of s so that s = \"zabcde\".\nNow, t is a subsequence of s (\"zabcde\").\nIt can be shown that appending any 4 characters to the end of s will never make t a subsequence.\n\n \nConstraints:\n\n1 <= s.length, t.length <= 10^5\ns and t consist only of lowercase English letters.\n\n",
+        "sample_code": "class Solution:\n    def appendCharacters(self, s: str, t: str) -> int:\n        ",
+        "answer": "class Solution:\n    def appendCharacters(self, s: str, t: str) -> int:\n        i = 0\n        for char in s:\n            if i < len(t) and char == t[i]:\n                i += 1\n        return len(t) - i"
+    }
+]
\ No newline at end of file
--- a/scripts/lcb_runner/prompts/few_shot_examples/generation/stdin.json
+++ b/scripts/lcb_runner/prompts/few_shot_examples/generation/stdin.json
+[
+    {
+        "question": "You have $n$ gifts and you want to give all of them to children. Of course, you don't want to offend anyone, so all gifts should be equal between each other. The $i$-th gift consists of $a_i$ candies and $b_i$ oranges.\n\nDuring one move, you can choose some gift $1 \\le i \\le n$ and do one of the following operations:\n\n  eat exactly one candy from this gift (decrease $a_i$ by one);  eat exactly one orange from this gift (decrease $b_i$ by one);  eat exactly one candy and exactly one orange from this gift (decrease both $a_i$ and $b_i$ by one). \n\nOf course, you can not eat a candy or orange if it's not present in the gift (so neither $a_i$ nor $b_i$ can become less than zero).\n\nAs said above, all gifts should be equal. This means that after some sequence of moves the following two conditions should be satisfied: $a_1 = a_2 = \\dots = a_n$ and $b_1 = b_2 = \\dots = b_n$ (and $a_i$ equals $b_i$ is not necessary).\n\nYour task is to find the minimum number of moves required to equalize all the given gifts.\n\nYou have to answer $t$ independent test cases.\n\n\n-----Input-----\n\nThe first line of the input contains one integer $t$ ($1 \\le t \\le 1000$) \u2014 the number of test cases. Then $t$ test cases follow.\n\nThe first line of the test case contains one integer $n$ ($1 \\le n \\le 50$) \u2014 the number of gifts. The second line of the test case contains $n$ integers $a_1, a_2, \\dots, a_n$ ($1 \\le a_i \\le 10^9$), where $a_i$ is the number of candies in the $i$-th gift. The third line of the test case contains $n$ integers $b_1, b_2, \\dots, b_n$ ($1 \\le b_i \\le 10^9$), where $b_i$ is the number of oranges in the $i$-th gift.\n\n\n-----Output-----\n\nFor each test case, print one integer: the minimum number of moves required to equalize all the given gifts.\n\n\n-----Example-----\nInput\n5\n3\n3 5 6\n3 2 3\n5\n1 2 3 4 5\n5 4 3 2 1\n3\n1 1 1\n2 2 2\n6\n1 1000000000 1000000000 1000000000 1000000000 1000000000\n1 1 1 1 1 1\n3\n10 12 8\n7 5 4\n\nOutput\n6\n16\n0\n4999999995\n7\n\n\n\n-----Note-----\n\nIn the first test case of the example, we can perform the following sequence of moves:\n\n  choose the first gift and eat one orange from it, so $a = [3, 5, 6]$ and $b = [2, 2, 3]$;  choose the second gift and eat one candy from it, so $a = [3, 4, 6]$ and $b = [2, 2, 3]$;  choose the second gift and eat one candy from it, so $a = [3, 3, 6]$ and $b = [2, 2, 3]$;  choose the third gift and eat one candy and one orange from it, so $a = [3, 3, 5]$ and $b = [2, 2, 2]$;  choose the third gift and eat one candy from it, so $a = [3, 3, 4]$ and $b = [2, 2, 2]$;  choose the third gift and eat one candy from it, so $a = [3, 3, 3]$ and $b = [2, 2, 2]$.",
+        "answer": "def minimum_moves(t, test_cases):\n    for _ in range(t):\n        n = test_cases[_][0]\n        candies = test_cases[_][1]\n        oranges = test_cases[_][2]\n        min_candies = min(candies)\n        min_oranges = min(oranges)\n        ans = 0\n        for i in range(n):\n            ans += max(candies[i] - min_candies, oranges[i] - min_oranges)\n        print(ans)\n\n\ndef main():\n    t = int(input())\n    test_cases = []\n    for _ in range(t):\n        n = int(input())\n        candies = list(map(int, input().split()))\n        oranges = list(map(int, input().split()))\n        test_cases.append((n, candies, oranges))\n    minimum_moves(t, test_cases)\n\n\nmain()\n"
+    },
+    {
+        "question": "Let's call a string a phone number if it has length 11 and fits the pattern \"8xxxxxxxxxx\", where each \"x\" is replaced by a digit.\n\nFor example, \"80123456789\" and \"80000000000\" are phone numbers, while \"8012345678\" and \"79000000000\" are not.\n\nYou have n cards with digits, and you want to use them to make as many phone numbers as possible. Each card must be used in at most one phone number, and you don't have to use all cards. The phone numbers do not necessarily have to be distinct.\n\nInput\n\nThe first line contains an integer n \u2014 the number of cards with digits that you have (1 \u2264 n \u2264 100).\n\nThe second line contains a string of n digits (characters \"0\", \"1\", ..., \"9\") s_1, s_2, \u2026, s_n. The string will not contain any other characters, such as leading or trailing spaces.\n\nOutput\n\nIf at least one phone number can be made from these cards, output the maximum number of phone numbers that can be made. Otherwise, output 0.\n\nExamples\n\nInput\n\n11\n00000000008\n\n\nOutput\n\n1\n\n\nInput\n\n22\n0011223344556677889988\n\n\nOutput\n\n2\n\n\nInput\n\n11\n31415926535\n\n\nOutput\n\n0\n\nNote\n\nIn the first example, one phone number, \"8000000000\", can be made from these cards.\n\nIn the second example, you can make two phone numbers from the cards, for example, \"80123456789\" and \"80123456789\".\n\nIn the third example you can't make any phone number from the given cards.",
+        "answer": "def count_phone_numbers(num_cards, card_digits):\n    count_eights = card_digits.count(\"8\")\n    max_phone_numbers = num_cards // 11\n    max_possible = min(count_eights, max_phone_numbers)\n    return max_possible\n\ndef main():\n    num_cards = int(input())\n    card_digits = input().strip()\n    max_possible = count_phone_numbers(num_cards, card_digits)\n    print(max_possible)\n\nmain()"
+    }
+]
\ No newline at end of file
--- a/scripts/lcb_runner/prompts/self_repair.py
+++ b/scripts/lcb_runner/prompts/self_repair.py
+import json
+
+from anthropic import HUMAN_PROMPT, AI_PROMPT
+
+from lcb_runner.lm_styles import LMStyle
+
+
+class PromptConstants:
+    SYSTEM_MESSAGE_GENERIC = f"You are a helpful programming assistant and an expert Python programmer. You are helping a user write a program to solve a problem. The user has written some code, but it has some errors and is not passing the tests. You will help the user by first giving a concise (at most 2-3 sentences) textual explanation of what is wrong with the code. After you have pointed out what is wrong with the code, you will then generate a fixed version of the program. You must put the entired fixed program within code delimiters only for once."
+
+    SYSTEM_MESSAGE_DEEPSEEK = f"You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you are helping a user correct a error program for code competition. The user has written some code, but it has some errors and is not passing the tests. You will help the user by first giving a concise (at most 2-3 sentences) textual explanation of what is wrong with the code. After you have pointed out what is wrong with the code, you will then generate a fixed version of the entire executable program. You must put the entire fixed executable program within code delimiters."
+
+    SYSTEM_MESSAGE_MAGIC = f"You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.\n\n@@ Instruction\n"
+
+    SYSTEM_MESSAGE_WIZARD = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+
+    SYSTEM_MESSAGE_PHIND = f"""You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. You must put the entired fixed program within code delimiters only for once., for example: 
+```python 
+# YOUR CODE HERE
+```"""
+
+    FORMATTING_REPEAT = f"First reason about the code providing a textual explanation of what is wrong with the code and then generate a fixed of the program enclosed code delimiters."
+
+    FORMATTING_MESSAGE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters."
+
+    FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows."
+
+
+# def truncate_io(io):
+#     if len(str(io)) > 200:
+#         io = str(io)[:200] + "...."
+#     return io
+
+
+def get_check_prompt(question: str, result, metadata):
+    ## assumes i/o examples are already truncated!
+    ## less pressure on storing 10 MB json because on a single large input-output pair
+    # result_by_test_case = result
+    # assert len(metadata) == 1, f"metadata = {metadata}"
+    # metadata = metadata[0]
+    metadata = json.loads(metadata)
+    if "error_code" not in metadata:
+        return ""
+    if metadata["error_code"] == -1:
+        # time limit exceeded
+        message = f"The above code is incorrect and got the following compilation error.\n{metadata['error']}"
+    elif metadata["error_code"] == -2:
+        # wrong answer
+        message = f"The above code is incorrect and got a wrong answer.\nInput: {metadata['inputs']}\nGenerated Output: {metadata['output']}\nExpected: {metadata['expected']}"
+    elif metadata["error_code"] == -3:
+        # time limit exceeded
+        message = f"The above code is incorrect and got time limit exceeded.\n{metadata['error']}\nInput: {metadata['inputs']}\nExpected: {metadata['expected']}"
+        pass
+    elif metadata["error_code"] == -4:
+        # runtime error
+        message = f"The above code is incorrect and got a runtime error.\nInput: {metadata['inputs']}\nExpected: {metadata['expected']}\n{metadata['error']}"
+    else:
+        raise NotImplementedError(
+            f"metadata['error_code'] = {metadata['error_code']} not implemented || {metadata=}"
+        )
+    return message
+
+
+def get_generic_question_template_answer(question: str, code, result, metadata):
+    prompt = f"### Question:\n{question}\n\n"
+    prompt += f"### Answer:\n```python\n{code}\n```\n\n"
+    prompt += get_check_prompt(question, result, metadata) + "\n"
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+
+def get_cllama_question_template_answer(question: str, code, result, metadata):
+    prompt = f"### Question\n{question}\n\n"
+    prompt += f"### Answer\n```python\n{code}\n```\n\n"
+    prompt += get_check_prompt(question, result, metadata)
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+
+def get_deepseekcode_question_template_answer(question: str, code, result, metadata):
+    prompt = f"### Instruction: You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
+    prompt += f"Question:\n{question}\n\n"
+    prompt += f"### Response:\n```python\n{code}\n```\n\n"
+    prompt += get_check_prompt(question, result, metadata)
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+
+def get_magicoder_question_template_answer(question: str, code, result, metadata):
+    prompt = f"You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
+    prompt += f"Question:\n{question}\n\n"
+    prompt += f"@@ Response \n```python\n{code}\n```\n\n"
+    prompt += get_check_prompt(question, result, metadata)
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+
+def get_mixtral_question_template_answer(question: str, code, result, metadata):
+    prompt = f"Question:\n"
+    prompt += f"{question}\n\n"
+    prompt += f"Answer:\n\n"
+    prompt += f"```python\n\n{code}\n``\n\n"
+    prompt += get_check_prompt(question, result, metadata)
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+
+def get_wizard_question_template_answer(question: str, code, result, metadata):
+    prompt = f"""### Instruction: You are a helpful programming assistant and an expert Python programmer. You are helping a user write a program to solve a problem. The user has written some code, but it has some errors and is not passing the tests. You will help the user by first giving a concise (at most 2-3 sentences) textual explanation of what is wrong with the code. After you have pointed out what is wrong with the code, you will then generate a fixed version of the program. You must put the entired fixed program within code delimiters only for once., for example:
+    ```python
+    # YOUR CODE HERE
+    ```
+"""
+    prompt += f"{question}\n\n"
+    prompt += f"### Response:```python\n\n{code}\n```\n\n"
+    prompt += get_check_prompt(question, result, metadata)
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+
+def get_phind_question_template_answer(question: str, code, result, metadata):
+    prompt = f"{question}\n\n"
+    prompt += f"```python\n{code}\n``` \n\n"
+    prompt += get_check_prompt(question, result, metadata)
+    prompt += f"\n\n### Assistant"
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+    return prompt
+
+def get_qwen_question_template_answer(question: str, code, result, metadata):
+    from transformers import AutoTokenizer
+
+    tokenizer = AutoTokenizer.from_pretrained(
+        "abacusai/Dracarys-72B-Instruct", padding_side="left", use_fast=False
+    )
+    prompt = f"""### Instruction: You are a helpful programming assistant and an expert Python programmer. You are helping a user write a program to solve a problem. The user has written some code, but it has some errors and is not passing the tests. You will help the user by first giving a concise (at most 2-3 sentences) textual explanation of what is wrong with the code. After you have pointed out what is wrong with the code, you will then generate a fixed version of the program. You must put the entired fixed program within code delimiters only for once., for example:
+    ```python
+    # YOUR CODE HERE
+    ```\n\n
+"""
+    prompt += f"Question:\n{question}\n\n"
+    prompt += f"```python\n{code}\n``` \n\n"
+    prompt += get_check_prompt(question, result, metadata)
+    prompt += f"\n\n### Assistant"
+    prompt += f"### Format: {PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n"
+    prompt += "```python\n# YOUR CODE HERE\n```\n\n"
+    prompt += f"### Answer: (use the provided format with backticks)\n\n"
+
+    messages = [
+        {"role": "user", "content": prompt},
+    ]
+
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        truncation=False,
+        padding=False,
+    )
+    return prompt
+
+def format_prompt_self_repair(
+    question: str, LanguageModelStyle: LMStyle, code, result, metadata
+) -> str:
+    if result:
+        # The code is accepted, no need to change anything.
+        return ""
+    if LanguageModelStyle == LMStyle.OpenAIChat:
+        chat_messages = [
+            {"role": "system", "content": PromptConstants.SYSTEM_MESSAGE_GENERIC},
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(
+                    question, code, result, metadata
+                )
+                + "\n\n"
+                + PromptConstants.FORMATTING_REPEAT,
+            },
+        ]
+        return chat_messages
+    if LanguageModelStyle == LMStyle.LLaMa3:
+        chat_messages = [
+            {"role": "system", "content": PromptConstants.SYSTEM_MESSAGE_GENERIC},
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(
+                    question, code, result, metadata
+                ),
+            },
+        ]
+
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "meta-llama/Meta-Llama-3-8B-Instruct", padding_side="left", use_fast=False
+        )
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+    elif LanguageModelStyle == LMStyle.Claude:
+        prompt = f"{HUMAN_PROMPT}\n{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n\n{get_generic_question_template_answer(question, code, result, metadata).rstrip()}\n{AI_PROMPT}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.Claude3:
+        system = PromptConstants.SYSTEM_MESSAGE_GENERIC
+        prompt = [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(
+                    question, code, result, metadata
+                ).rstrip(),
+            }
+        ]
+        return system, prompt
+    elif LanguageModelStyle == LMStyle.MistralWeb:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_GENERIC,
+            },
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(question, code, result, metadata),
+            },
+        ]
+        return chat_messages
+    elif LanguageModelStyle == LMStyle.Gemini:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n{get_generic_question_template_answer(question, code, result,metadata)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.StarCoderInstruct:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n{get_generic_question_template_answer(question, code, result,metadata)}"
+        return prompt
+
+    elif LanguageModelStyle == LMStyle.DeepSeekCodeInstruct:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_DEEPSEEK}\n\n{get_deepseekcode_question_template_answer(question, code, result,metadata)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.CodeLLaMaInstruct:
+        prompt = f"[INST] <<SYS>>\n{PromptConstants.SYSTEM_MESSAGE_GENERIC}\n<</SYS>>\n\n{get_cllama_question_template_answer(question, code, result,metadata)}\n[/INST]"
+        return prompt
+    elif LanguageModelStyle == LMStyle.MagiCoder:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_MAGIC}\n{get_magicoder_question_template_answer(question, code, result,metadata)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.WizardCoder:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_WIZARD}\n\n{get_wizard_question_template_answer(question, code, result,metadata)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.Phind:
+        prompt = f"### System Prompt\n\n{PromptConstants.SYSTEM_MESSAGE_PHIND}\n\n### User Message\n\n{get_phind_question_template_answer(question, code, result,metadata)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.DracarysQwen:
+        prompt = f"{get_qwen_question_template_answer(question, code, result,metadata)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.DracarysLlama:
+        chat_messages = [
+            {"role": "system", "content": PromptConstants.SYSTEM_MESSAGE_GENERIC},
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_answer(
+                    question, code, result, metadata
+                ),
+            },
+        ]
+
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "abacusai/Dracarys-Llama-3.1-70B-Instruct", padding_side="right", use_fast=False
+        )
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+    if LanguageModelStyle == LMStyle.Eurusx:
+        prompt = "[INST] Write Python code to solve the task:\n"
+        prompt += f"{get_wizard_question_template_answer(question, code, result,metadata)}"
+        prompt += "[/INST]"
+        return prompt
+    else:
+        raise NotImplementedError(
+            f"LanguageModelStyle {LanguageModelStyle} not implemented"
+        )
+
+
+def extract_code(model_output: str, lmstyle: LMStyle):
+    outputlines = model_output.split("\n")
+    if lmstyle == LMStyle.CodeLLaMa:
+        indexlines = [i for i, line in enumerate(outputlines) if "PYTHON]" in line]
+    else:
+        indexlines = [i for i, line in enumerate(outputlines) if "```" in line]
+    if len(indexlines) < 2:
+        return ""
+    return "\n".join(outputlines[indexlines[0] + 1 : indexlines[1]])
+
+
+def test():
+    def write_str_or_json(prompt):
+        if isinstance(prompt, str):
+            fp.write(prompt)
+        else:
+            fp.write(json.dumps(prompt))
+        return
+
+    for lm_style in [LMStyle.OpenAIChat]:
+        with open(
+            "output/GPT-3.5-Turbo-0125/Scenario.codegeneration_10_0.2_eval_all.json"
+        ) as f:
+            check_metadata = json.load(f)[0]
+        checked_base_question_cotent = check_metadata["question_content"]
+        checked_base_codes = check_metadata["code_list"][0]
+        checked_base_results = check_metadata["graded_list"][0]
+        checked_base_metadata = check_metadata["metadata"][0]
+        leetcode_prompt = format_prompt_self_repair(
+            checked_base_question_cotent,
+            lm_style,
+            checked_base_codes,
+            checked_base_results,
+            checked_base_metadata,
+        )
+
+        with open(f"/tmp/leetcode_{lm_style}.txt", "w") as fp:
+            write_str_or_json(leetcode_prompt)
+    return
+
+
+if __name__ == "__main__":
+    test()
--- a/scripts/lcb_runner/prompts/test_output_prediction.py
+++ b/scripts/lcb_runner/prompts/test_output_prediction.py
+import json
+
+from anthropic import HUMAN_PROMPT, AI_PROMPT
+
+from lcb_runner.lm_styles import LMStyle
+from lcb_runner.benchmarks import TestOutputPredictionProblem
+
+
+class PromptConstants:
+    SYSTEM_MESSAGE_CHAT_GENERIC = f"You are a helpful programming assistant and an expert Python programmer.\
+ You are helping a user to write a test case to help to check the correctness of the function.\
+ The user has written a input for the testcase.\
+ You will calculate the output of the testcase and\
+ write the whole assertion statement in the markdown code block with the correct output."
+
+    SYSTEM_MESSAGE_COMPLETION_GENERIC = f"You are a helpful programming assistant and an expert Python programmer.\
+ You are helping a user to write a test case to help to check the correctness of the function."
+
+    SYSTEM_MESSAGE_INST_CLLAMA = f"You are a helpful programming assistant and an expert Python programmer.\
+ You are helping a user to write a test case to help to check the correctness of the function.\
+ The user has written a input for the testcase.\
+ You will calculate the output of the testcase and \
+ write out the complete assertion statement between [PYTHON] and [/PYTHON] tags."
+
+    SYSTEM_MESSAGE_WIZARD = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+
+    SYSTEM_MESSAGE_PHIND = f"""You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program. You must put the entired fixed program within code delimiters only for once., for example: 
+```python 
+# YOUR CODE HERE
+```"""
+
+    FORMATTING_MESSAGE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters."
+
+    FORMATTING_WITHOUT_STARTER_MESSAGE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows."
+
+
+def truncate_io(io):
+    if len(str(io)) > 1000:
+        io = str(io)[:1000] + "...."
+        print(io)
+    return io
+
+
+def format_testcase_func_name_input(function_name, testcase):
+    """
+    use the form of "assert func_name(input) == "
+    """
+    # TODO should there be a space after the == ?
+    input_str = ", ".join(testcase.split("\n"))
+    return f"assert {function_name}({input_str}) == # TODO"
+
+
+def parse_function_name_from_starter_code(starter_code):
+    """
+    starter_code : str
+    """
+    import ast
+
+    tree = ast.parse(starter_code)
+    fn = None
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef):
+            assert fn is None
+            fn = node.name
+    return fn
+
+
+def get_generic_question_template_test_completion(
+    question: TestOutputPredictionProblem, testcase_input: str
+):
+    prompt = f"Problem:\n{question.question_content}"
+    prompt += f"Function:\n```\n{question.starter_code}\n```\n"
+
+    # parse function name from starter_code
+    func_name = parse_function_name_from_starter_code(question.starter_code)
+    prompt += "Please complete the following test case:\n\n"
+    prompt += (
+        f"```\n{format_testcase_func_name_input(func_name, testcase_input)}\n```\n"
+    )
+
+    return prompt
+
+
+def get_cllama_question_template_answer(
+    question: TestOutputPredictionProblem, testcase_input: str
+):
+    prompt = f"### Question\n"
+    prompt += get_generic_question_template_test_completion(question, testcase_input)
+    prompt += f"### Answer\n"
+    return prompt
+
+
+def get_deepseekcode_question_template_answer(
+    question: TestOutputPredictionProblem, testcase_input: str
+):
+    prompt = f"### Instruction: {PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n\n"
+    prompt += get_generic_question_template_test_completion(question, testcase_input)
+    prompt += f"### Response:\n\n"
+    return prompt
+
+
+def get_magicoder_question_template_answer(
+    question: TestOutputPredictionProblem, testcase_input: str
+):
+    # prompt = f"You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.\n\n"
+    prompt = f"Question:\n"
+    prompt += get_generic_question_template_test_completion(question, testcase_input)
+    prompt += f"@@ Response \n"
+    return prompt
+
+
+def get_mixtral_question_template_answer(
+    question: TestOutputPredictionProblem, testcase_input: str
+):
+    prompt = get_generic_question_template_test_completion(question, testcase_input)
+    return prompt
+
+
+def get_wizard_question_template_answer(
+    question: TestOutputPredictionProblem, testcase_input: str
+):
+    prompt = f"""### Instruction: {PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n"""
+    prompt += get_generic_question_template_test_completion(question, testcase_input)
+    prompt += f"### Response:\n"
+    return prompt
+
+
+def get_phind_question_template_answer(
+    question: TestOutputPredictionProblem, testcase_input: str
+):
+    prompt = get_generic_question_template_test_completion(question, testcase_input)
+    prompt += f"\n\n### Assistant"
+    return prompt
+
+def get_qwen_question_template_answer(question: TestOutputPredictionProblem, testcase_input: str):
+    from transformers import AutoTokenizer
+
+    tokenizer = AutoTokenizer.from_pretrained(
+        "abacusai/Dracarys-72B-Instruct", padding_side="left", use_fast=False
+    )
+
+    prompt = f"""### Instruction: {PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n"""
+    prompt += get_generic_question_template_test_completion(question, testcase_input)
+    prompt += f"### Response:\n"
+
+    messages = [
+        {"role": "user", "content": prompt},
+    ]
+
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        truncation=False,
+        padding=False,
+    )
+    return prompt
+
+def format_prompt_test_output(
+    question: TestOutputPredictionProblem, LanguageModelStyle: LMStyle
+) -> str:
+    testcase_input = question.test[0].input
+    if LanguageModelStyle == LMStyle.OpenAIChat:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC,
+            },
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_test_completion(
+                    question, testcase_input
+                ),
+            },
+        ]
+        return chat_messages
+    if LanguageModelStyle == LMStyle.LLaMa3:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC,
+            },
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_test_completion(
+                    question, testcase_input
+                ),
+            },
+        ]
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "meta-llama/Meta-Llama-3-8B-Instruct", padding_side="left", use_fast=False
+        )
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+    elif LanguageModelStyle == LMStyle.Claude:
+        prompt = f"{HUMAN_PROMPT}\n{PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n\n"
+        prompt += f"{get_generic_question_template_test_completion(question, testcase_input).rstrip()}\n{AI_PROMPT}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.Claude3:
+        system = PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC
+        prompt = [
+            {
+                "role": "user",
+                "content": get_generic_question_template_test_completion(
+                    question, testcase_input
+                ).rstrip(),
+            }
+        ]
+        return system, prompt
+    elif LanguageModelStyle == LMStyle.Gemini:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n"
+        prompt += (
+            f"{get_generic_question_template_test_completion(question, testcase_input)}"
+        )
+        return prompt
+
+    elif LanguageModelStyle == LMStyle.StarCoderInstruct:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n"
+        prompt += (
+            f"{get_generic_question_template_test_completion(question, testcase_input)}"
+        )
+        return prompt
+
+    elif LanguageModelStyle == LMStyle.DeepSeekCodeInstruct:
+        prompt = (
+            f"{get_deepseekcode_question_template_answer(question, testcase_input)}"
+        )
+        return prompt
+    elif LanguageModelStyle == LMStyle.CodeLLaMaInstruct:
+        prompt = f"[INST] <<SYS>>\n{PromptConstants.SYSTEM_MESSAGE_INST_CLLAMA}\n<</SYS>>\n\n"
+        prompt += (
+            f"{get_cllama_question_template_answer(question, testcase_input)}\n[/INST]"
+        )
+        return prompt
+    elif LanguageModelStyle == LMStyle.MagiCoder:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n"
+        prompt += f"{get_magicoder_question_template_answer(question, testcase_input)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.WizardCoder:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_WIZARD}\n\n{get_wizard_question_template_answer(question, testcase_input)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.Phind:
+        prompt = f"### System Prompt\n\n{PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n\n### User Message\n\n{get_phind_question_template_answer(question, testcase_input)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.OC:
+        prompt = f"{PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC}\n"
+        prompt += (
+            f"{get_generic_question_template_test_completion(question, testcase_input)}"
+        )
+        return prompt
+    elif LanguageModelStyle == LMStyle.MistralWeb:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC,
+            },
+            {
+                "role": "user",
+                "content": get_generic_question_template_test_completion(
+                    question, testcase_input
+                ),
+            },
+        ]
+        return chat_messages
+    elif (
+        LanguageModelStyle == LMStyle.DracarysQwen
+    ):
+        prompt = f"{get_qwen_question_template_answer(question, testcase_input)}"
+        return prompt
+    elif LanguageModelStyle == LMStyle.DracarysLlama:
+        chat_messages = [
+            {
+                "role": "system",
+                "content": PromptConstants.SYSTEM_MESSAGE_CHAT_GENERIC,
+            },
+        ]
+        chat_messages += [
+            {
+                "role": "user",
+                "content": get_generic_question_template_test_completion(
+                    question, testcase_input
+                ),
+            },
+        ]
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "abacusai/Dracarys-Llama-3.1-70B-Instruct", padding_side="right", use_fast=False
+        )
+        return tokenizer.apply_chat_template(
+            chat_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            truncation=False,
+            padding=False,
+        )
+    else:
+        raise NotImplementedError(
+            f"LanguageModelStyle {LanguageModelStyle} not implemented"
+        )
--- a/scripts/lcb_runner/pyext/pyext-0.7.tar.gz
+++ b/scripts/lcb_runner/pyext/pyext-0.7.tar.gz
--- a/scripts/lcb_runner/pyext/pyext-0.7/PKG-INFO
+++ b/scripts/lcb_runner/pyext/pyext-0.7/PKG-INFO
+Metadata-Version: 1.1
+Name: pyext
+Version: 0.7
+Summary: Simple Python extensions.
+Home-page: UNKNOWN
+Author: Ryan Gonzalez
+Author-email: kirbyfan64sos@gmail.com
+License: UNKNOWN
+Description: PyExt
+        =====
+        
+        .. image:: https://travis-ci.org/kirbyfan64/PyExt.png
+            :target: https://travis-ci.org/kirbyfan64/PyExt
+        
+        Several simple extensions that add some nifty features to Python.
+        
+        Links:
+        ******
+        
+        ========= =============================================
+        GitHub    https://github.com/kirbyfan64/PyExt
+        PyPI      https://pypi.python.org/pypi/pyext
+        Newsgroup https://groups.google.com/forum/#!forum/pyext
+        ========= =============================================
+        
+        Features:
+        *********
+        
+        - Function overloading
+        - Switch statement
+        - Runtime module creation
+        - Tail recursion removal
+        - Python 2 function annotations
+        - Python 2 safe tuple unpacking
+        - Assignment if condition is true
+        
+        Examples:
+        *********
+        
+        Function overloading::
+           
+           @overload.argc(1)
+           def f(a): print 'Function 1 called'
+           @overload.argc(2)
+           def f(a, b): print 'Function 2 called'
+           
+           f(1)
+           f(1, 2)
+        
+        Switch statement::
+           
+           with switch(1):
+               if case(0): print 'Awkward...'; case.quit() # case.quit() is the same as break
+               if case(2): print '???'
+               if case(1): print 'Phew! It works!'
+               if case.default(): print 'Ummmm...'
+        
+        Function annotations::
+           
+           @fannotate('Return annotation', a=1, b=2)
+           def x(a, b):
+               return 0
+        
+        Assign if condition is true::
+           
+           compare_and_swap('my_var', None, 2) # set my_var to 2 if it equals None
+        
+        .. note:: Please ignore this project's messy commit history(several commits under invalid_email_address, about 20 commits labeled Initial). I was trying to use hg-git and kept goofing stuff up.
+        
+Platform: UNKNOWN
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
--- a/scripts/lcb_runner/pyext/pyext-0.7/README.rst
+++ b/scripts/lcb_runner/pyext/pyext-0.7/README.rst
+PyExt
+=====
+
+.. image:: https://travis-ci.org/kirbyfan64/PyExt.png
+    :target: https://travis-ci.org/kirbyfan64/PyExt
+
+Several simple extensions that add some nifty features to Python.
+
+Links:
+******
+
+========= =============================================
+GitHub    https://github.com/kirbyfan64/PyExt
+PyPI      https://pypi.python.org/pypi/pyext
+Newsgroup https://groups.google.com/forum/#!forum/pyext
+========= =============================================
+
+Features:
+*********
+
+- Function overloading
+- Switch statement
+- Runtime module creation
+- Tail recursion removal
+- Python 2 function annotations
+- Python 2 safe tuple unpacking
+- Assignment if condition is true
+
+Examples:
+*********
+
+Function overloading::
+   
+   @overload.argc(1)
+   def f(a): print 'Function 1 called'
+   @overload.argc(2)
+   def f(a, b): print 'Function 2 called'
+   
+   f(1)
+   f(1, 2)
+
+Switch statement::
+   
+   with switch(1):
+       if case(0): print 'Awkward...'; case.quit() # case.quit() is the same as break
+       if case(2): print '???'
+       if case(1): print 'Phew! It works!'
+       if case.default(): print 'Ummmm...'
+
+Function annotations::
+   
+   @fannotate('Return annotation', a=1, b=2)
+   def x(a, b):
+       return 0
+
+Assign if condition is true::
+   
+   compare_and_swap('my_var', None, 2) # set my_var to 2 if it equals None
+
+.. note:: Please ignore this project's messy commit history(several commits under invalid_email_address, about 20 commits labeled Initial). I was trying to use hg-git and kept goofing stuff up.
--- a/scripts/lcb_runner/pyext/pyext-0.7/__pycache__/pyext.cpython-311.pyc
+++ b/scripts/lcb_runner/pyext/pyext-0.7/__pycache__/pyext.cpython-311.pyc
--- a/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/PKG-INFO
+++ b/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/PKG-INFO
+Metadata-Version: 2.1
+Name: pyext
+Version: 0.7
+Summary: Simple Python extensions.
+Author: Ryan Gonzalez
+Author-email: kirbyfan64sos@gmail.com
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+
+PyExt
+=====
+
+.. image:: https://travis-ci.org/kirbyfan64/PyExt.png
+    :target: https://travis-ci.org/kirbyfan64/PyExt
+
+Several simple extensions that add some nifty features to Python.
+
+Links:
+******
+
+========= =============================================
+GitHub    https://github.com/kirbyfan64/PyExt
+PyPI      https://pypi.python.org/pypi/pyext
+Newsgroup https://groups.google.com/forum/#!forum/pyext
+========= =============================================
+
+Features:
+*********
+
+- Function overloading
+- Switch statement
+- Runtime module creation
+- Tail recursion removal
+- Python 2 function annotations
+- Python 2 safe tuple unpacking
+- Assignment if condition is true
+
+Examples:
+*********
+
+Function overloading::
+   
+   @overload.argc(1)
+   def f(a): print 'Function 1 called'
+   @overload.argc(2)
+   def f(a, b): print 'Function 2 called'
+   
+   f(1)
+   f(1, 2)
+
+Switch statement::
+   
+   with switch(1):
+       if case(0): print 'Awkward...'; case.quit() # case.quit() is the same as break
+       if case(2): print '???'
+       if case(1): print 'Phew! It works!'
+       if case.default(): print 'Ummmm...'
+
+Function annotations::
+   
+   @fannotate('Return annotation', a=1, b=2)
+   def x(a, b):
+       return 0
+
+Assign if condition is true::
+   
+   compare_and_swap('my_var', None, 2) # set my_var to 2 if it equals None
+
+.. note:: Please ignore this project's messy commit history(several commits under invalid_email_address, about 20 commits labeled Initial). I was trying to use hg-git and kept goofing stuff up.
--- a/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/SOURCES.txt
+++ b/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/SOURCES.txt
+README.rst
+pyext.py
+setup.cfg
+setup.py
+pyext.egg-info/PKG-INFO
+pyext.egg-info/SOURCES.txt
+pyext.egg-info/dependency_links.txt
+pyext.egg-info/top_level.txt
+test/test_pyext.py
\ No newline at end of file
--- a/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/dependency_links.txt
+++ b/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/dependency_links.txt
+
--- a/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/top_level.txt
+++ b/scripts/lcb_runner/pyext/pyext-0.7/pyext.egg-info/top_level.txt
+pyext
--- a/scripts/lcb_runner/pyext/pyext-0.7/pyext.py
+++ b/scripts/lcb_runner/pyext/pyext-0.7/pyext.py
+'''
+Copyright (C) 2014 Ryan Gonzalez
+
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
+Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+'''
+
+g_backup = globals().copy()
+
+__version__ = '0.7'
+
+__all__ = ['overload', 'RuntimeModule', 'switch', 'tail_recurse', 'copyfunc', 'set_docstring', 'annotate', 'safe_unpack', 'modify_function', 'assign', 'fannotate', 'compare_and_swap', 'is_main', 'call_if_main', 'run_main']
+
+import sys, inspect, types
+
+def __targspec(func, specs, attr='__orig_arg__'):
+    if hasattr(func, '__is_overload__') and func.__is_overload__:
+        return getattr(func, attr)
+    return specs(func)
+
+def set_docstring(doc):
+    '''A simple decorator to set docstrings.
+
+       :param doc: The docstring to tie to the function.
+
+       Example::
+
+          @set_docstring('This is a docstring')
+          def myfunc(x):
+              pass'''
+    def _wrap(f):
+        f.__doc__ = doc
+        return f
+    return _wrap
+
+__modify_function_doc = '''
+Creates a copy of a function, changing its attributes.
+
+:param globals: Will be added to the function's globals.
+
+:param name: The new function name. Set to ``None`` to use the function's original name.
+
+:param code: The new function code object. Set to ``None`` to use the function's original code object.
+
+:param defaults: The new function defaults. Set to ``None`` to use the function's original defaults.
+
+:param closure: The new function closure. Set to ``None`` to use the function's original closure.
+
+.. warning:: This function can be potentially dangerous.
+'''
+
+def copyfunc(f):
+   '''Copies a funcion.
+
+      :param f: The function to copy.
+
+      :return: The copied function.
+
+      .. deprecated:: 0.4
+         Use :func:`modify_function` instead.
+      '''
+   return modify_function(f)
+
+if sys.version_info.major == 3:
+    @set_docstring(__modify_function_doc)
+    def modify_function(f, globals={}, name=None, code=None, defaults=None,
+                        closure=None):
+        if code is None: code = f.__code__
+        if name is None: name = f.__name__
+        if defaults is None: defaults = f.__defaults__
+        if closure is None: closure = f.__closure__
+        newf = types.FunctionType(code, dict(f.__globals__, **globals), name=name,
+                                  argdefs=defaults, closure=closure)
+        newf.__dict__.update(f.__dict__)
+        return newf
+    def argspec(f):
+        return inspect.getfullargspec(f)
+    ofullargspec = inspect.getfullargspec
+    def _fullargspec(func):
+        return __targspec(func, ofullargspec)
+    inspect.getfullargspec = _fullargspec
+    def _exec(m,g): exec(m,g)
+else:
+    @set_docstring(__modify_function_doc)
+    def modify_function(f, globals={}, name=None, code=None, defaults=None,
+                        closure=None):
+        if code is None: code = f.func_code
+        if name is None: name = f.__name__
+        if defaults is None: defaults = f.func_defaults
+        if closure is None: closure = f.func_closure
+        newf = types.FunctionType(code, dict(f.func_globals, **globals), name=name,
+                                  argdefs=defaults, closure=closure)
+        newf.__dict__.update(f.__dict__)
+        return newf
+    def argspec(f):
+        return inspect.getargspec(f)
+    eval(compile('def _exec(m,g): exec m in g', '<exec>', 'exec'))
+
+def _gettypes(args):
+    return tuple(map(type, args))
+
+# oargspec = inspect.getargspec
+oargspec = inspect.getfullargspec
+
+def _argspec(func):
+    return __targspec(func, oargspec)
+
+inspect.getargspec = _argspec
+
+try:
+    import IPython
+except ImportError:
+    IPython = None
+else:
+    # Replace IPython's argspec
+    oipyargspec = IPython.core.oinspect.getargspec
+    def _ipyargspec(func):
+        return __targspec(func, oipyargspec, '__orig_arg_ipy__')
+    IPython.core.oinspect.getargspec = _ipyargspec
+
+class overload(object):
+    '''Simple function overloading in Python.'''
+    _items = {}
+    _types = {}
+    @classmethod
+    def argc(self, argc=None):
+        '''Overloads a function based on the specified argument count.
+
+           :param argc: The argument count. Defaults to ``None``. If ``None`` is given, automatically compute the argument count from the given function.
+
+           .. note::
+
+              Keyword argument counts are NOT checked! In addition, when the argument count is automatically calculated, the keyword argument count is also ignored!
+
+           Example::
+
+               @overload.argc()
+               def func(a):
+                   print 'Function 1 called'
+
+               @overload.argc()
+               def func(a, b):
+                   print 'Function 2 called'
+
+               func(1) # Calls first function
+               func(1, 2) # Calls second function
+               func() # Raises error
+               '''
+        # Python 2 UnboundLocalError fix
+        argc = {'argc': argc}
+        def _wrap(f):
+            def _newf(*args, **kwargs):
+                if len(args) not in self._items[f.__name__]:
+                    raise TypeError("No overload of function '%s' that takes %d args" % (f.__name__, len(args)))
+                return self._items[f.__name__][len(args)](*args, **kwargs)
+            if f.__name__ not in self._items:
+                self._items[f.__name__] = {}
+            if argc['argc'] is None:
+                argc['argc'] = len(argspec(f).args)
+            self._items[f.__name__][argc['argc']] = f
+            _newf.__name__ = f.__name__
+            _newf.__doc__ = f.__doc__
+            _newf.__is_overload__ = True
+            _newf.__orig_arg__ = argspec(f)
+            if IPython:
+                _newf.__orig_arg_ipy__ = IPython.core.oinspect.getargspec(f)
+            return _newf
+        return _wrap
+    @classmethod
+    def args(self, *argtypes, **kw):
+        '''Overload a function based on the specified argument types.
+
+           :param argtypes: The argument types. If None is given, get the argument types from the function annotations(Python 3 only)
+           :param kw: Can only contain 1 argument, `is_cls`. If True, the function is assumed to be part of a class.
+
+           Example::
+
+               @overload.args(str)
+               def func(s):
+                   print 'Got string'
+
+               @overload.args(int, str)
+               def func(i, s):
+                   print 'Got int and string'
+
+               @overload.args()
+               def func(i:int): # A function annotation example
+                   print 'Got int'
+
+               func('s')
+               func(1)
+               func(1, 's')
+               func(True) # Raises error
+            '''
+
+        # Python 2 UnboundLocalError fix...again!
+        argtypes = {'args': tuple(argtypes)}
+        def _wrap(f):
+            def _newf(*args):
+                if len(kw) == 0:
+                    cargs = args
+                elif len(kw) == 1 and 'is_cls' in kw and kw['is_cls']:
+                    cargs = args[1:]
+                else:
+                    raise ValueError('Invalid keyword args specified')
+                if _gettypes(cargs) not in self._types[f.__name__]:
+                    raise TypeError("No overload of function '%s' that takes '%s' types and %d arg(s)" % (f.__name__, _gettypes(cargs), len(cargs)))
+                return self._types[f.__name__][_gettypes(cargs)](*args)
+            if f.__name__ not in self._types:
+                self._types[f.__name__] = {}
+            if len(argtypes['args']) == 1 and argtypes['args'][0] is None:
+                aspec = argspec(f)
+                argtypes['args'] = tuple(map(lambda x: x[1], sorted(
+                    aspec.annotations.items(), key=lambda x: aspec.args.index(x[0]))))
+            self._types[f.__name__][argtypes['args']] = f
+            _newf.__name__ = f.__name__
+            _newf.__doc__ = f.__doc__
+            _newf.__is_overload__ = True
+            _newf.__orig_arg__ = argspec(f)
+            if IPython:
+                _newf.__orig_arg_ipy__ = IPython.core.oinspect.getargspec(f)
+            return _newf
+        return _wrap
+
+class _RuntimeModule(object):
+    'Create a module object at runtime and insert it into sys.path. If called, same as :py:func:`from_objects`.'
+    def __call__(self, *args, **kwargs):
+        return self.from_objects(*args, **kwargs)
+    @staticmethod
+    @overload.argc(1)
+    def from_objects(name, **d):
+        return _RuntimeModule.from_objects(name, '', **d)
+    @staticmethod
+    @overload.argc(2)
+    def from_objects(name, docstring, **d):
+        '''Create a module at runtime from `d`.
+
+           :param name: The module name.
+
+           :param docstring: Optional. The module's docstring.
+
+           :param \*\*d: All the keyword args, mapped from name->value.
+
+           Example: ``RuntimeModule.from_objects('name', 'doc', a=1, b=2)``'''
+        module = types.ModuleType(name, docstring)
+        module.__dict__.update(d)
+        module.__file__ = '<runtime_module>'
+        sys.modules[name] = module
+        return module
+    @staticmethod
+    @overload.argc(2)
+    def from_string(name, s):
+        return _RuntimeModule.from_string(name, '', s)
+    @staticmethod
+    @overload.argc(3)
+    def from_string(name, docstring, s):
+        '''Create a module at runtime from `s``.
+
+           :param name: The module name.
+
+           :param docstring: Optional. The module docstring.
+
+           :param s: A string containing the module definition.'''
+        g = {}
+        _exec(s, g)
+        return _RuntimeModule.from_objects(name, docstring, **dict(filter(lambda x: x[0] not in g_backup, g.items())))
+
+RuntimeModule = _RuntimeModule()
+
+class CaseObject(object):
+    'The object returned by a switch statement. When called, it will return True if the given argument equals its value, else False. It can be called with multiple parameters, in which case it checks if its value equals any of the arguments.'
+    def __init__(self, value):
+        self.value = value
+        self.did_match = False
+        self.did_pass = False
+    def __call__(self, *args):
+        if assign('res', not self.did_pass and any([self.value == rhs for rhs in args])):
+            self.did_match = True
+        return res
+    def quit(self):
+        'Forces all other calls to return False. Equilavent of a ``break`` statement.'
+        self.did_pass = True
+    def default(self):
+        "Executed if quit wasn't called."
+        return not self.did_match and not self.did_pass
+    def __iter__(self):
+        yield self
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        pass
+
+def switch(value):
+    '''A Python switch statement implementation that is used with a ``with`` statement.
+
+       :param value: The value to "switch".
+
+       ``with`` statement example::
+
+           with switch('x'):
+               if case(1): print 'Huh?'
+               if case('x'): print 'It works!!!'
+
+       .. warning:: If you modify a variable named "case" in the same scope that you use the ``with`` statement version, you will get an UnboundLocalError. The soluction is to use ``with switch('x') as case:`` instead of ``with switch('x'):``.'''
+    res = CaseObject(value)
+    inspect.stack()[1][0].f_globals['case'] = res
+    return res
+
+def tail_recurse(spec=None):
+    '''Remove tail recursion from a function.
+
+       :param spec: A function that, when given the arguments, returns a bool indicating whether or not to exit. If ``None,`` tail recursion is always called unless the function returns a value.
+
+       .. note::
+
+           This function has a slight overhead that is noticable when using timeit. Only use it if the function has a possibility of going over the recursion limit.
+
+       .. warning::
+
+           This function will BREAK any code that either uses any recursion other than tail recursion or calls itself multiple times. For example, ``def x(): return x()+1`` will fail.
+
+       Example::
+
+           @tail_recurse()
+           def add(a, b):
+               if a == 0: return b
+               return add(a-1, b+1)
+
+           add(10000000, 1) # Doesn't max the recursion limit.
+           '''
+    def _wrap(f):
+        class TailRecursion(Exception):
+            def __init__(self, args, kwargs):
+                self.args = args
+                self.kwargs = kwargs
+        def _newf(*args, **kwargs):
+            if inspect.stack()[1][3] == f.__name__:
+                if (spec and spec(args)) or not spec:
+                    raise TailRecursion(args, kwargs)
+            while True:
+                try:
+                    res = f(*args, **kwargs)
+                except TailRecursion as ex:
+                    args = ex.args
+                    kwargs = ex.kwargs
+                    continue
+                else:
+                    return res
+        _newf.__doc__ = f.__doc__
+        return _newf
+    return _wrap
+
+def annotate(*args, **kwargs):
+    '''Set function annotations using decorators.
+
+       :param args: This is a list of annotations for the function, in the order of the function's parameters. For example, ``annotate('Annotation 1', 'Annotation 2')`` will set the annotations of parameter 1 of the function to ``Annotation 1``.
+
+       :param kwargs: This is a mapping of argument names to annotations. Note that these are applied *after* the argument list, so any args set that way will be overriden by this mapping. If there is a key named `ret`, that will be the annotation for the function's return value.
+
+       .. deprecated:: 0.5
+         Use :func:`fannotate` instead.
+'''
+    def _wrap(f):
+        if not hasattr(f, '__annotations__'):
+            f.__annotations__ = {}
+        if 'ret' in kwargs:
+            f.__annotations__['return'] = kwargs.pop('ret')
+        f.__annotations__.update(dict(zip(argspec(f).args, args)))
+        f.__annotations__.update(kwargs)
+        return f
+    return _wrap
+
+def fannotate(*args, **kwargs):
+    '''Set function annotations using decorators.
+
+       :param \*args: The first positional argument is used for the function's return value; all others are discarded.
+
+       :param \**kwargs: This is a mapping of argument names to annotations.
+
+       Example::
+
+           @fannotate('This for the return value', a='Parameter a', b='Parameter b')
+           def x(a, b):
+               pass
+
+       '''
+    def _wrap(f):
+        if not hasattr(f, '__annotations__'):
+            f.__annotations__ = {}
+        if len(args) >= 1:
+            f.__annotations__['return'] = args[0]
+        f.__annotations__.update(kwargs)
+        return f
+    return _wrap
+
+def safe_unpack(seq, ln, fill=None):
+    '''Safely unpack a sequence to length `ln`, without raising ValueError. Based on Lua's method of unpacking. Empty values will be filled in with `fill`, while any extra values will be cut off.
+
+       :param seq: The sequence to unpack.
+
+       :param ln: The expected length of the sequence.
+
+       :param fill: The value to substitute if the sequence is too small. Defaults to ``None``.
+
+       Example::
+
+           s = 'a:b'
+           a, b = safe_unpack(s.split(':'), 2)
+           # a = 'a'
+           # b = 'b'
+           s = 'a'
+           a, b = safe_unpack(s.split(':'), 2)
+           # a = 'a'
+           # b = None'''
+    if len(seq) > ln:
+        return seq[:ln]
+    elif len(seq) < ln:
+        return seq + type(seq)([fill]*(ln-len(seq)))
+    else:
+        return seq
+
+def assign(varname, value):
+    '''Assign `value` to `varname` and return it. If `varname` is an attribute and the instance name it belongs to is not defined, a NameError is raised.
+       This can be used to emulate assignment as an expression. For example, this::
+
+          if assign('x', 7): ...
+
+       is equilavent to this C code::
+
+          if (x = 7) ...
+
+       .. warning::
+
+          When assigning an attribute, the instance it belongs to MUST be declared as global prior to the assignment. Otherwise, the assignment will not work.
+    '''
+    fd = inspect.stack()[1][0].f_globals
+    if '.' not in varname:
+        fd[varname] = value
+    else:
+        vsplit = list(map(str.strip, varname.split('.')))
+        if vsplit[0] not in fd:
+            raise NameError('Unknown object: %s'%vsplit[0])
+        base = fd[vsplit[0]]
+        for x in vsplit[1:-1]:
+            base = getattr(base, x)
+        setattr(base, vsplit[-1], value)
+    return value
+
+def is_main(frame=1):
+    "Return if the caller is main. Equilavent to ``__name__ == '__main__'``."
+    return inspect.stack()[frame][0].f_globals['__name__'] == '__main__'
+
+def _call_if_main(frame, f, args):
+    if is_main(frame): return f(*args)
+
+def call_if_main(f,*args):
+    "Call the `f` with `args` if the caller's module is main."
+    return _call_if_main(3,f,args)
+
+def run_main(f,*args):
+    "Call `f` with the `args` and terminate the program with its return code if the caller's module is main."
+    sys.exit(_call_if_main(3,f,args))
+
+def compare_and_swap(var, compare, new):
+    "If `var` is equal to `compare`, set it to `new`."
+    if assign('v', inspect.stack()[1][0].f_globals)[var] == compare:
+        v[var] = new
--- a/scripts/lcb_runner/pyext/pyext-0.7/setup.cfg
+++ b/scripts/lcb_runner/pyext/pyext-0.7/setup.cfg
+[egg_info]
+tag_build = 
+tag_date = 0
+tag_svn_revision = 0
+
--- a/scripts/lcb_runner/pyext/pyext-0.7/setup.py
+++ b/scripts/lcb_runner/pyext/pyext-0.7/setup.py
+try:
+    from setuptools import setup
+except ImportError:
+    from distutils.core import setup
+
+import pyext
+
+with open('README.rst', 'r') as f:
+    readme = f.read()
+
+setup(name='pyext',
+      version=str(pyext.__version__),
+      author='Ryan Gonzalez',
+      author_email='kirbyfan64sos@gmail.com',
+      py_modules=['pyext'],
+      description='Simple Python extensions.',
+      long_description=readme,
+      classifiers=[
+          'License :: OSI Approved :: MIT License',
+          'Programming Language :: Python :: 2',
+          'Programming Language :: Python :: 3']
+      )
+
--- a/scripts/lcb_runner/pyext/pyext-0.7/test/test_pyext.py
+++ b/scripts/lcb_runner/pyext/pyext-0.7/test/test_pyext.py
+import sys, inspect, types, unittest
+from pyext import *
+
+class TestPyExt(unittest.TestCase):
+    def test_overload_argc(self):
+        @overload.argc(1)
+        def f(a): return 1
+        @overload.argc(2)
+        def f(a, b): return 2
+        @overload.argc()
+        def f(): return 0
+        self.assertEqual(f(), 0)
+        self.assertEqual(f(1), 1)
+        self.assertEqual(f(1, 2), 2)
+        self.assertRaises(TypeError, f, 1, 2, 3)
+        self.assertEqual(len(inspect.getargspec(f).args), 0)
+    def test_overload_args(self):
+        @overload.args(str, int)
+        def f(a, b): return str, int
+        @overload.args(int)
+        def f(a): return int
+        @overload.args(str)
+        def f(a): return str
+        @overload.args()
+        def f(): return
+        self.assertEqual(f(), None)
+        self.assertEqual(f(0), int)
+        self.assertEqual(f('s'), str)
+        self.assertEqual(f('s', 0), (str, int))
+        self.assertRaises(TypeError, f, 0, 's')
+        self.assertEqual(len(inspect.getargspec(f).args), 0)
+        class x(object):
+            @overload.args(str, is_cls=True)
+            def f(self, s): return 1
+            @overload.args(int, is_cls=True)
+            def f(self, i): return 2
+        self.assertEqual(x().f('s'), 1)
+        self.assertEqual(x().f(1), 2)
+    def test_module(self):
+        m = RuntimeModule('s', 'doc', x=1, f=2)
+        self.assertEqual(m.x, 1)
+        self.assertEqual(m.f, 2)
+        self.assertTrue(isinstance(m, types.ModuleType))
+        self.assertEqual(m.__doc__, 'doc')
+        m2 = RuntimeModule.from_string('s', 'doc', 'a=7; b=6')
+        self.assertEqual(m2.a, 7)
+        self.assertEqual(m2.b, 6)
+    def test_switch(self):
+        with switch('x'):
+            if case('x'): x = 4; case.quit()
+            if case('b'): x = 2
+            if case(1): x = 3
+            if case('a'): x = 1
+            if case('x'): x = 0
+        self.assertEqual(x, 4)
+        with switch(1):
+            if case.default(): x = 7
+        self.assertEqual(x, 7)
+        with switch(2):
+            if case(1,2): x = 9
+        self.assertEqual(x, 9)
+    def test_annot(self):
+        @fannotate('r', a='a', b=1, c=2)
+        def x(a, b, c): pass
+        self.assertEqual(x.__annotations__, {'a': 'a', 'b': 1, 'c': 2, 'return': 'r'})
+    def test_unpack(self):
+        t = (1, 2, 3)
+        self.assertEqual(safe_unpack(t,2), (1,2))
+        self.assertEqual(safe_unpack(t,4), (1,2,3,None))
+        self.assertEqual(safe_unpack(t,4,fill=0), (1,2,3,0))
+    def test_assign(self):
+        self.assertEqual(assign('x', 7), 7)
+        self.assertEqual(x, 7)
+        global f
+        def f(): pass
+        self.assertEqual(assign('f.__annotations__', {'a': 1}), {'a': 1})
+        self.assertEqual(f.__annotations__, {'a': 1})
+    def test_compare_and_swap(self):
+        global v
+        v = None
+        compare_and_swap('v', None, 7)
+        self.assertEqual(v, 7)
+        compare_and_swap('v', None, 8)
+        self.assertEqual(v, 7)
+    if sys.version_info.major == 3:
+        def test_overload_args_annot(self):
+            def x(a, b): return 0
+            x.__annotations__ = {'a': int, 'b': str}
+            x = overload.args(None)(x)
+            self.assertEqual(x(1, 's'), 0)
+            self.assertRaises(TypeError, x, 1, 2)
+
+if __name__ == '__main__':
+    unittest.main()
--- a/scripts/lcb_runner/runner/base_runner.py
+++ b/scripts/lcb_runner/runner/base_runner.py
+import os
+import json
+from abc import ABC, abstractmethod
+
+from tqdm import tqdm
+
+from lcb_runner.lm_styles import LanguageModel
+from lcb_runner.utils.path_utils import get_cache_path
+from lcb_runner.utils.multiprocess import run_tasks_in_parallel
+from lcb_runner.runner.scenario_router import Scenario
+
+
+class BaseRunner(ABC):
+    def __init__(self, args, model: LanguageModel):
+        self.args = args
+        self.model = model
+        self.client_kwargs: dict[str | str] = {}
+
+        if self.args.use_cache:
+            self.cache_path = get_cache_path(model.model_repr, args)
+            if os.path.exists(self.cache_path):
+                with open(self.cache_path) as f:
+                    self.cache: dict = json.load(f)
+            else:
+                self.cache = {}
+        else:
+            self.cache_path = None
+            self.cache = None
+
+    def save_cache(self):
+        if self.args.use_cache:
+            with open(self.cache_path, "w") as f:
+                json.dump(self.cache, f, indent=4)
+
+    # @abstractmethod
+    def _run_single(self, prompt: str | list[dict[str, str]]) -> list[str]:
+        pass
+
+    @staticmethod
+    def run_single(combined_args) -> list[str]:
+        """
+        Run the model for a single prompt and return the output
+        Static method to be used in multiprocessing
+        Calls the _run_single method with the combined arguments
+        """
+        prompt: str | list[dict[str, str]]
+        cache: dict[str, str]
+        call_method: callable
+        prompt, cache, args, call_method = combined_args
+
+        if isinstance(prompt, list):
+            prompt_cache = json.dumps(prompt)
+        elif isinstance(prompt, tuple):
+            prompt_cache = prompt[0] + json.dumps(prompt[1])
+        else:
+            prompt_cache = prompt      
+
+        if cache is not None and prompt_cache in cache:
+            if len(cache[prompt_cache]) == args.n:
+                return cache[prompt_cache]
+
+        result = call_method(prompt)
+        assert len(result) == args.n
+
+        return result
+
+    def run_batch(self, prompts: list[str | list[dict[str, str]]]) -> list[list[str]]:
+        outputs = []
+        arguments = [
+            (
+                prompt,
+                self.cache,  ## pass the cache as argument for cache check
+                self.args,  ## pass the args as argument for cache check
+                self._run_single,  ## pass the _run_single method as argument because of multiprocessing
+            )
+            for prompt in prompts
+        ]
+        if self.args.multiprocess > 1:
+            parallel_outputs = run_tasks_in_parallel(
+                self.run_single,
+                arguments,
+                self.args.multiprocess,
+                use_progress_bar=True,
+            )
+            for output in parallel_outputs:
+                if output.is_success():
+                    outputs.append(output.result)
+                else:
+                    print("Failed to run the model for some prompts")
+                    print(output.status)
+                    print(output.exception_tb)
+                    outputs.extend([""] * self.args.n)
+        else:
+            outputs = [self.run_single(argument) for argument in tqdm(arguments)]
+
+        if self.args.use_cache:
+            for prompt, output in zip(prompts, outputs):
+                if isinstance(prompt, list):
+                    prompt_cache = json.dumps(prompt)
+                elif isinstance(prompt, tuple):
+                    prompt_cache = prompt[0] + json.dumps(prompt[1])
+                else:
+                    prompt_cache = prompt
+                self.cache[prompt_cache] = output  ## save the output to cache
+
+        return outputs
+
+    def prompts_to_outputs(
+        self, prompts: list[str | list[dict[str, str]]]
+    ) -> list[list[str]]:
+        if self.args.use_cache:
+            outputs = []
+            batch_size = self.args.cache_batch_size
+            for i in range(0, len(prompts), batch_size):
+                batch = prompts[i : i + batch_size]
+                batch_outputs = self.run_batch(batch)
+                outputs.extend(batch_outputs)
+                self.save_cache()
+        else:
+            outputs = self.run_batch(prompts)
+        return outputs
+
+    def run_main_repair(self, benchmark: list, format_prompt: callable) -> list[list[str]]:
+        assert self.args.n == 1
+        with open(
+            f"output/{self.model.model_repr}/{Scenario.codegeneration}_{self.args.codegen_n}_{self.args.temperature}_eval_all.json"
+        ) as f:
+            check_metadata_list = json.load(f)
+
+        outputs = [
+            [None for _ in range(self.args.codegen_n)]
+            for _ in range(len(benchmark))
+        ]
+        prompts = []
+        prompt_index_to_question_idx = {}
+        prompt_index_to_code_idx = {}
+        count = 0
+
+        for problem_idx, problem in enumerate(benchmark):
+            for check_metadata_idx, check_metadata in enumerate(check_metadata_list):
+                if problem.question_id == check_metadata['question_id']:
+                    count += 1 
+                    question_content = check_metadata["question_content"]
+                    code_list = check_metadata["code_list"]
+                    output_list = check_metadata["output_list"]
+                    graded_list = check_metadata["graded_list"]
+                    metadata = check_metadata["metadata"]
+                    for code_idx in range(len(code_list)):
+                        prompt = format_prompt(
+                            question_content,
+                            self.model.model_style,
+                            code_list[code_idx],
+                            graded_list[code_idx],
+                            metadata[code_idx],
+                        )
+                        if prompt == "":
+                            outputs[problem_idx][code_idx] = output_list[code_idx]
+                            continue
+                        prompts.append(prompt)
+                        prompt_index_to_question_idx[len(prompts) - 1] = problem_idx
+                        prompt_index_to_code_idx[len(prompts) - 1] = code_idx
+
+        assert len(benchmark)==count, f"{len(benchmark)=}!={count=}"
+
+        prompt_outputs = self.prompts_to_outputs(prompts)
+        for prompt_idx, output in enumerate(prompt_outputs):
+            question_idx = prompt_index_to_question_idx[prompt_idx]
+            code_idx = prompt_index_to_code_idx[prompt_idx]
+            outputs[question_idx][code_idx] = output
+
+        return outputs
+
+    def run_main(self, benchmark: list, format_prompt: callable) -> list[list[str]]:
+        if self.args.scenario == Scenario.selfrepair:
+            return self.run_main_repair(benchmark, format_prompt)
+
+        prompts = [
+            format_prompt(problem, self.model.model_style) for problem in benchmark
+        ]
+        outputs = self.prompts_to_outputs(prompts)
+        return outputs