include: humaneval.yaml task: humaneval_64 repeats: 20 metric_list: - metric: !function utils.pass_at_k aggregation: mean higher_is_better: true k: [ 10 ] generation_kwargs: until: - "\nclass" - "\ndef" - "\n#" - "\nif" - "\nprint" max_gen_toks: 1024 do_sample: true temperature: 0.8 top_p: 0.95