include: humaneval.yaml task: humaneval_64 repeats: 64 metric_list: - metric: !function utils.pass_at_k aggregation: mean higher_is_better: true k: [2,8,16,32,64] generation_kwargs: until: - "\nclass" - "\ndef" - "\n#" - "\nif" - "\nprint" max_gen_toks: 1024 do_sample: true temperature: 0.2 top_p: 0.95