arena_hard.yaml 816 Bytes
Newer Older
Baber's avatar
Baber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
tag:
  - math_word_problems
task: arena_hard
#dataset_path: gsm8k
#dataset_name: main
output_type: generate_until
#training_split: train
#fewshot_split: train
doc_to_text: ["<|User Prompt|>\n{question_1}\n\n<|The Start of Assistant A's Answer|>\n{answer_1}\n<|The End of Assistant A's Answer|>\n\n<|The Start of Assistant B's Answer|>\n{answer_2}\n<|The End of Assistant B's Answer|>"]
# TODO: need a different metric
#doc_to_target: A, B #" {{answer.split('### ')[-1].rstrip()}}"
metric_list:
  - metric: bypass
    aggregation: mean
generation_kwargs:
  until:
    - "Question:"
    - "</s>"
    - "<|im_end|>"
  do_sample: false
  temperature: 0.0
filter_list:
  - name: "test"
    filter:
      - function: "regex"
        regex_pattern: "[[([AB<>=]+)]]"
      - function: "take_first"
repeats: 1
num_fewshot: 5