truncate thinking; modify gpqa

a6780a55 · Baber · a87fe425 · a6780a55 · a6780a55
Commit a6780a55 authored Feb 27, 2025 by Baber
Showing with 10 additions and 12 deletions

lm_eval/models/vllm_causallms.py lm_eval/models/vllm_causallms.py +1 -0

lm_eval/tasks/gpqa/cot_zeroshot/_gpqa_cot_zeroshot_yaml lm_eval/tasks/gpqa/cot_zeroshot/_gpqa_cot_zeroshot_yaml +9 -12

No files found.
--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
@@ -426,6 +426,7 @@ class VLLM(TemplateLM):
            # cache generations
            for output, context in zip(cont, context):
                generated_text = output.outputs[0].text
+                generated_text = generated_text.split("/think>")[-1]
                res.append(generated_text)
                self.cache_hook.add_partial(
                    "generate_until", (context, gen_kwargs), generated_text

--- a/lm_eval/tasks/gpqa/cot_zeroshot/_gpqa_cot_zeroshot_yaml
+++ b/lm_eval/tasks/gpqa/cot_zeroshot/_gpqa_cot_zeroshot_yaml
@@ -6,27 +6,24 @@ training_split: train
 # Because huggingface dataset only has train split
 validation_split: train
 test_split: null
-doc_to_text: "What is the correct answer to this question:{{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nLet's think step by step: "
+process_docs: !function utils.process_docs
+doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\n\nQuestion: {{Question}}\nChoices:\nA. {{choice1}}\nB. {{choice2}}\nC. {{choice3}}\nD. {{choice4}}\nPlease reason step by step and conclude with:\nThe answer is [the_answer_letter].\nwhere the [the_answer_letter] is one of A, B, C or D"
+gen_prefix: "<think>\n"
 doc_to_target: answer
 filter_list:
  - name: "strict-match"
    filter:
      - function: "regex"
-        regex_pattern: "(?<=The answer is )(.*)(?=.)"
+        regex_pattern: "([A-D])"
-      - function: "take_first"
-  - name: "flexible-extract"
-    filter:
-      - function: "multi_choice_regex"
        group_select: -1
-        ignore_case: true
        ignore_punctuation: true
-        regex_pattern: "(\\([A-Z]\\))"
      - function: "take_first"
 generation_kwargs:
-  until:
+  until: []
-    - "</s>"
+  do_sample: true
-  do_sample: false
+  temperature: 0.6
-  temperature: 0.0
+  top_p: 0.95
+  max_gen_toks: 32768
 num_fewshot: 0
 metric_list:
  - metric: exact_match