include: gsm8k-cot.yaml group: - chain_of_thought - self_consistency task: gsm8k_cot_self_consistency generation_kwargs: until: - "Q:" - "\n\n" do_sample: true temperature: 0.2 repeats: 64 filter_list: - name: "score-first" # pick only the first response, and report metrics on that filter: - function: "regex" regex_pattern: "The answer is (\\-?[0-9\\.\\,]*[0-9]+)" - function: "take_first" - name: "maj@64" filter: - function: "regex" regex_pattern: "The answer is (\\-?[0-9\\.\\,]*[0-9]+)" - function: "majority_vote" - function: "take_first" - name: "maj@8" # get Maj@8 , via selecting the first 8 responses. Using a better estimator would be optimal. filter: - function: "take_first_k" k: 8 - function: "regex" regex_pattern: "The answer is (\\-?[0-9\\.\\,]*[0-9]+)" - function: "majority_vote" - function: "take_first" metadata: version: 0.0