task: humaneval dataset_path: openai/openai_humaneval output_type: generate_until test_split: test doc_to_text: "{{prompt}}" doc_to_target: "{{test}}\ncheck({{entry_point}})" metric_list: - metric: !function utils.pass_at_1 aggregation: mean higher_is_better: true k: 64 generation_kwargs: until: - "\nclass" - "\ndef" - "\n#" - "\nif" - "\nprint" do_sample: true temperature: 0.2 top_p: 0.95 repeats: 2 num_fewshot: 0 filter_list: - name: "n=64" # number of samples to estimate pass@k filter: - function: "custom" filter_fn: !function utils.build_predictions metadata: version: 1.0