task: humaneval dataset_path: openai/openai_humaneval unsafe_code: true output_type: generate_until test_split: test doc_to_text: "{{prompt}}" doc_to_target: "{{test}}\ncheck({{entry_point}})" metric_list: - metric: !function utils.pass_at_k aggregation: mean higher_is_better: true k: [1] generation_kwargs: until: - "\nclass" - "\ndef" - "\n#" - "\nif" - "\nprint" max_gen_toks: 1024 do_sample: false repeats: 1 num_fewshot: 0 filter_list: - name: "create_test" filter: - function: "custom" filter_fn: !function utils.build_predictions metadata: version: 1.0