include: humaneval.yaml task: humaneval_64 repeats: 20 metric_list: - metric: !function utils.pass_at_k aggregation: mean higher_is_better: true k: [10] generation_kwargs: until: [ "\nclass", "\nif", "\nprint", "\n#", "\n```", "\n```\n\n", "<|eot_id|>", ] max_gen_toks: 512 do_sample: true temperature: 0.2 top_p: 0.95