Commit a6780a55 authored by Baber's avatar Baber
Browse files

truncate thinking; modify gpqa

parent a87fe425
...@@ -426,6 +426,7 @@ class VLLM(TemplateLM): ...@@ -426,6 +426,7 @@ class VLLM(TemplateLM):
# cache generations # cache generations
for output, context in zip(cont, context): for output, context in zip(cont, context):
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
generated_text = generated_text.split("/think>")[-1]
res.append(generated_text) res.append(generated_text)
self.cache_hook.add_partial( self.cache_hook.add_partial(
"generate_until", (context, gen_kwargs), generated_text "generate_until", (context, gen_kwargs), generated_text
......
...@@ -6,27 +6,24 @@ training_split: train ...@@ -6,27 +6,24 @@ training_split: train
# Because huggingface dataset only has train split # Because huggingface dataset only has train split
validation_split: train validation_split: train
test_split: null test_split: null
doc_to_text: "What is the correct answer to this question:{{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nLet's think step by step: " process_docs: !function utils.process_docs
doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the best answer.\n\nQuestion: {{Question}}\nChoices:\nA. {{choice1}}\nB. {{choice2}}\nC. {{choice3}}\nD. {{choice4}}\nPlease reason step by step and conclude with:\nThe answer is [the_answer_letter].\nwhere the [the_answer_letter] is one of A, B, C or D"
gen_prefix: "<think>\n"
doc_to_target: answer doc_to_target: answer
filter_list: filter_list:
- name: "strict-match" - name: "strict-match"
filter: filter:
- function: "regex" - function: "regex"
regex_pattern: "(?<=The answer is )(.*)(?=.)" regex_pattern: "([A-D])"
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "multi_choice_regex"
group_select: -1 group_select: -1
ignore_case: true
ignore_punctuation: true ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first" - function: "take_first"
generation_kwargs: generation_kwargs:
until: until: []
- "</s>" do_sample: true
do_sample: false temperature: 0.6
temperature: 0.0 top_p: 0.95
max_gen_toks: 32768
num_fewshot: 0 num_fewshot: 0
metric_list: metric_list:
- metric: exact_match - metric: exact_match
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment