task: mbpp dataset_path: google-research-datasets/mbpp dataset_name: full unsafe_code: true output_type: generate_until test_split: test doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]" doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}" target_delimiter: "\n" metric_list: - metric: !function utils.pass_at_1 aggregation: mean higher_is_better: true generation_kwargs: until: - "[DONE]" do_sample: false num_fewshot: 3 fewshot_config: sampler: first_n samples: !function utils.list_fewshot_samples metadata: version: 1.0