humaneval.yaml 647 Bytes
Newer Older
Hojin Lee's avatar
Hojin Lee committed
1
2
3
4
5
task: humaneval
dataset_path: openai/openai_humaneval
output_type: generate_until
test_split: test
doc_to_text: "{{prompt}}"
Baber's avatar
Baber committed
6
doc_to_target: "{{test}}\ncheck({{entry_point}})"
Hojin Lee's avatar
Hojin Lee committed
7
8
9
10
metric_list:
  - metric: !function utils.pass_at_1
    aggregation: mean
    higher_is_better: true
Baber's avatar
Baber committed
11
    k: 64
Hojin Lee's avatar
Hojin Lee committed
12
13
14
15
16
17
18
19
20
21
generation_kwargs:
  until:
    - "\nclass"
    - "\ndef"
    - "\n#"
    - "\nif"
    - "\nprint"
  do_sample: true
  temperature: 0.2
  top_p: 0.95
Baber's avatar
Baber committed
22
repeats: 2
Hojin Lee's avatar
Hojin Lee committed
23
24
25
26
27
28
29
30
num_fewshot: 0
filter_list:
  - name: "n=64" # number of samples to estimate pass@k
    filter:
      - function: "custom"
        filter_fn: !function utils.build_predictions
metadata:
  version: 1.0