{ "results": { "bigbench_hyperbaton": { "multiple_choice_grade": 0.51524, "multiple_choice_grade_stderr": 0.0022350513992069 }, "bigbench_salient_translation_error_detection": { "multiple_choice_grade": 0.19839679358717435, "multiple_choice_grade_stderr": 0.012629887094728112 }, "bigbench_geometric_shapes": { "multiple_choice_grade": 0.2785515320334262, "multiple_choice_grade_stderr": 0.023692665345206258, "exact_str_match": 0.0, "exact_str_match_stderr": 0.0 }, "bigbench_navigate": { "multiple_choice_grade": 0.49, "multiple_choice_grade_stderr": 0.015816135752773193 }, "bigbench_date_understanding": { "multiple_choice_grade": 0.6991869918699187, "multiple_choice_grade_stderr": 0.023906779002093273 }, "bigbench_disambiguation_qa": { "multiple_choice_grade": 0.5426356589147286, "multiple_choice_grade_stderr": 0.031075544990472662 }, "bigbench_tracking_shuffled_objects_three_objects": { "multiple_choice_grade": 0.53, "multiple_choice_grade_stderr": 0.02886365132641709 }, "bigbench_dyck_languages": { "multiple_choice_grade": 0.212, "multiple_choice_grade_stderr": 0.01293148186493804 }, "bigbench_formal_fallacies_syllogisms_negation": { "multiple_choice_grade": 0.5058450704225352, "multiple_choice_grade_stderr": 0.004195767817554208 }, "bigbench_tracking_shuffled_objects_seven_objects": { "multiple_choice_grade": 0.15485714285714286, "multiple_choice_grade_stderr": 0.00865039181414196 }, "bigbench_causal_judgement": { "multiple_choice_grade": 0.5736842105263158, "multiple_choice_grade_stderr": 0.03597255252302466 }, "bigbench_movie_recommendation": { "multiple_choice_grade": 0.632, "multiple_choice_grade_stderr": 0.02158898256835354 }, "bigbench_tracking_shuffled_objects_five_objects": { "multiple_choice_grade": 0.2128, "multiple_choice_grade_stderr": 0.01158102863217863 }, "bigbench_snarks": { "multiple_choice_grade": 0.4696132596685083, "multiple_choice_grade_stderr": 0.03719891321680327 }, "bigbench_sports_understanding": { "multiple_choice_grade": 0.6237322515212982, "multiple_choice_grade_stderr": 0.01543581207286162 }, "bigbench_logical_deduction_seven_objects": { "multiple_choice_grade": 0.25285714285714284, "multiple_choice_grade_stderr": 0.01643996352811702 }, "bigbench_temporal_sequences": { "multiple_choice_grade": 0.146, "multiple_choice_grade_stderr": 0.011171786285496496 }, "bigbench_logical_deduction_five_objects": { "multiple_choice_grade": 0.368, "multiple_choice_grade_stderr": 0.021588982568353548 }, "bigbench_ruin_names": { "multiple_choice_grade": 0.39732142857142855, "multiple_choice_grade_stderr": 0.023145155753004788 }, "bigbench_logical_deduction_three_objects": { "multiple_choice_grade": 0.53, "multiple_choice_grade_stderr": 0.02886365132641709 }, "bigbench_reasoning_about_colored_objects": { "multiple_choice_grade": 0.5565, "multiple_choice_grade_stderr": 0.011111507899646487 } }, "versions": { "bigbench_hyperbaton": 0, "bigbench_salient_translation_error_detection": 0, "bigbench_geometric_shapes": 0, "bigbench_navigate": 0, "bigbench_date_understanding": 0, "bigbench_disambiguation_qa": 0, "bigbench_tracking_shuffled_objects_three_objects": 0, "bigbench_dyck_languages": 0, "bigbench_formal_fallacies_syllogisms_negation": 0, "bigbench_tracking_shuffled_objects_seven_objects": 0, "bigbench_causal_judgement": 0, "bigbench_movie_recommendation": 0, "bigbench_tracking_shuffled_objects_five_objects": 0, "bigbench_snarks": 0, "bigbench_sports_understanding": 0, "bigbench_logical_deduction_seven_objects": 0, "bigbench_temporal_sequences": 0, "bigbench_logical_deduction_five_objects": 0, "bigbench_ruin_names": 0, "bigbench_logical_deduction_three_objects": 0, "bigbench_reasoning_about_colored_objects": 0 }, "config": { "model": "hf-causal-experimental", "model_args": "pretrained=/gaueko1/hizkuntza-ereduak/LLaMA/lm/huggingface/30B,use_accelerate=True", "num_fewshot": 3, "batch_size": "auto", "device": "cuda", "no_cache": true, "limit": null, "bootstrap_iters": 100000, "description_dict": {} } }