changed dataset to parquet version (#2845)

908ac2b2 · Baber Abbasi · GitHub · 1b357a68 · 908ac2b2 · 908ac2b2
Unverified Commit 908ac2b2 authored Mar 26, 2025 by Baber Abbasi Committed by GitHub Mar 26, 2025
6 changed files
--- a/lm_eval/tasks/bbh/README.md
+++ b/lm_eval/tasks/bbh/README.md
@@ -51,3 +51,6 @@ None.
 - [ ] Variant with Calculator (see https://github.com/openai/grade-school-math/blob/master/grade_school_math/calculator.py for example implementation)
 - [ ] Using Verifiers
 - [ ] Majority voting "without CoT"
+
+### Changelog
+no version change: changed dataset to `SaylorTwift/bbh`. Do not expect any change in the results.
--- a/lm_eval/tasks/bbh/_generate_configs.py
+++ b/lm_eval/tasks/bbh/_generate_configs.py
@@ -11,8 +11,6 @@ import requests
 import yaml
 from tqdm import tqdm

-from lm_eval import utils
-

 def parse_args():
    parser = argparse.ArgumentParser()
@@ -51,9 +49,10 @@ if __name__ == "__main__":
                for shot in few_shot:
                    try:
                        answer = answer_regex.search(shot)[0]
-                    except Exception:
+                    except Exception as e:
                        print("task", task)
                        print(shot)
+                        raise e
                    example = shot.split("Let's think step by step.")[0]
                    prefix_doc_to_text += f"{example}{answer}\n\n"

@@ -70,7 +69,7 @@ if __name__ == "__main__":
        }

        file_save_path = args.save_prefix_path + f"/{task}.yaml"
-        utils.eval_logger.info(f"Saving yaml for subset {task} to {file_save_path}")
+        print(f"Saving yaml for subset {task} to {file_save_path}")
        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,

--- a/lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
+++ b/lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
-dataset_path: lukaemon/bbh
+dataset_path: SaylorTwift/bbh
 output_type: generate_until
 test_split: test
 doc_to_target: "{{target}}"

--- a/lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
-dataset_path: lukaemon/bbh
+dataset_path: SaylorTwift/bbh
 output_type: generate_until
 test_split: test
 doc_to_target: "{{target}}"

--- a/lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
+++ b/lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
-dataset_path: lukaemon/bbh
+dataset_path: SaylorTwift/bbh
 output_type: generate_until
 test_split: test
 doc_to_target: "{{target}}"

--- a/lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
-dataset_path: lukaemon/bbh
+dataset_path: SaylorTwift/bbh
 output_type: generate_until
 test_split: test
 doc_to_target: "{{target}}"