{ "results": { "bigbench_disambiguation_qa": { "multiple_choice_grade": 0.26356589147286824, "multiple_choice_grade_stderr": 0.027481788262218698 }, "bigbench_logical_deduction_three_objects": { "multiple_choice_grade": 0.37, "multiple_choice_grade_stderr": 0.027921294063982 }, "bigbench_causal_judgement": { "multiple_choice_grade": 0.5210526315789473, "multiple_choice_grade_stderr": 0.03633739504773335 }, "bigbench_date_understanding": { "multiple_choice_grade": 0.36585365853658536, "multiple_choice_grade_stderr": 0.025108717905729792 }, "bigbench_navigate": { "multiple_choice_grade": 0.499, "multiple_choice_grade_stderr": 0.015819268290576817 }, "bigbench_salient_translation_error_detection": { "multiple_choice_grade": 0.19138276553106212, "multiple_choice_grade_stderr": 0.012458774650265594 }, "bigbench_temporal_sequences": { "multiple_choice_grade": 0.248, "multiple_choice_grade_stderr": 0.013663187134877651 }, "bigbench_tracking_shuffled_objects_seven_objects": { "multiple_choice_grade": 0.14, "multiple_choice_grade_stderr": 0.00829694743648913 }, "bigbench_ruin_names": { "multiple_choice_grade": 0.34375, "multiple_choice_grade_stderr": 0.02246478414865448 }, "bigbench_reasoning_about_colored_objects": { "multiple_choice_grade": 0.2485, "multiple_choice_grade_stderr": 0.009665432493822852 }, "bigbench_dyck_languages": { "multiple_choice_grade": 0.144, "multiple_choice_grade_stderr": 0.01110798754893915 }, "bigbench_logical_deduction_five_objects": { "multiple_choice_grade": 0.26, "multiple_choice_grade_stderr": 0.019635965529725512 }, "bigbench_sports_understanding": { "multiple_choice_grade": 0.5030425963488844, "multiple_choice_grade_stderr": 0.015931029729145698 }, "bigbench_tracking_shuffled_objects_three_objects": { "multiple_choice_grade": 0.37, "multiple_choice_grade_stderr": 0.027921294063982 }, "bigbench_geometric_shapes": { "multiple_choice_grade": 0.20055710306406685, "multiple_choice_grade_stderr": 0.021162707757982353, "exact_str_match": 0.0, "exact_str_match_stderr": 0.0 }, "bigbench_hyperbaton": { "multiple_choice_grade": 0.48618, "multiple_choice_grade_stderr": 0.0022352360227943418 }, "bigbench_logical_deduction_seven_objects": { "multiple_choice_grade": 0.19142857142857142, "multiple_choice_grade_stderr": 0.014880721436998012 }, "bigbench_snarks": { "multiple_choice_grade": 0.4972375690607735, "multiple_choice_grade_stderr": 0.037267230837657574 }, "bigbench_formal_fallacies_syllogisms_negation": { "multiple_choice_grade": 0.5005633802816901, "multiple_choice_grade_stderr": 0.004196051878850066 }, "bigbench_tracking_shuffled_objects_five_objects": { "multiple_choice_grade": 0.184, "multiple_choice_grade_stderr": 0.010964094540602657 }, "bigbench_movie_recommendation": { "multiple_choice_grade": 0.264, "multiple_choice_grade_stderr": 0.019732885585922087 } }, "versions": { "bigbench_disambiguation_qa": 0, "bigbench_logical_deduction_three_objects": 0, "bigbench_causal_judgement": 0, "bigbench_date_understanding": 0, "bigbench_navigate": 0, "bigbench_salient_translation_error_detection": 0, "bigbench_temporal_sequences": 0, "bigbench_tracking_shuffled_objects_seven_objects": 0, "bigbench_ruin_names": 0, "bigbench_reasoning_about_colored_objects": 0, "bigbench_dyck_languages": 0, "bigbench_logical_deduction_five_objects": 0, "bigbench_sports_understanding": 0, "bigbench_tracking_shuffled_objects_three_objects": 0, "bigbench_geometric_shapes": 0, "bigbench_hyperbaton": 0, "bigbench_logical_deduction_seven_objects": 0, "bigbench_snarks": 0, "bigbench_formal_fallacies_syllogisms_negation": 0, "bigbench_tracking_shuffled_objects_five_objects": 0, "bigbench_movie_recommendation": 0 }, "config": { "model": "hf-causal-experimental", "model_args": "pretrained=bigscience/bloom-7b1,use_accelerate=True", "num_fewshot": 3, "batch_size": "auto", "device": "cuda", "no_cache": true, "limit": null, "bootstrap_iters": 100000, "description_dict": {} } }