{ "results": { "bigbench_tracking_shuffled_objects_five_objects": { "multiple_choice_grade": 0.1824, "multiple_choice_grade_stderr": 0.010927017514830547 }, "bigbench_logical_deduction_seven_objects": { "multiple_choice_grade": 0.24571428571428572, "multiple_choice_grade_stderr": 0.01628337995683342 }, "bigbench_date_understanding": { "multiple_choice_grade": 0.6205962059620597, "multiple_choice_grade_stderr": 0.02529481360676469 }, "bigbench_navigate": { "multiple_choice_grade": 0.495, "multiple_choice_grade_stderr": 0.015818508944436645 }, "bigbench_geometric_shapes": { "multiple_choice_grade": 0.17827298050139276, "multiple_choice_grade_stderr": 0.02022856303248108, "exact_str_match": 0.0, "exact_str_match_stderr": 0.0 }, "bigbench_dyck_languages": { "multiple_choice_grade": 0.154, "multiple_choice_grade_stderr": 0.011419913065098684 }, "bigbench_temporal_sequences": { "multiple_choice_grade": 0.272, "multiple_choice_grade_stderr": 0.014078856992462611 }, "bigbench_snarks": { "multiple_choice_grade": 0.5082872928176796, "multiple_choice_grade_stderr": 0.03726268022638988 }, "bigbench_disambiguation_qa": { "multiple_choice_grade": 0.35271317829457366, "multiple_choice_grade_stderr": 0.029805242804674153 }, "bigbench_tracking_shuffled_objects_seven_objects": { "multiple_choice_grade": 0.13714285714285715, "multiple_choice_grade_stderr": 0.008225477923226985 }, "bigbench_ruin_names": { "multiple_choice_grade": 0.29910714285714285, "multiple_choice_grade_stderr": 0.021656359273376977 }, "bigbench_movie_recommendation": { "multiple_choice_grade": 0.404, "multiple_choice_grade_stderr": 0.021966635293832918 }, "bigbench_salient_translation_error_detection": { "multiple_choice_grade": 0.1653306613226453, "multiple_choice_grade_stderr": 0.011764848862417502 }, "bigbench_logical_deduction_five_objects": { "multiple_choice_grade": 0.29, "multiple_choice_grade_stderr": 0.020313179231745183 }, "bigbench_causal_judgement": { "multiple_choice_grade": 0.4842105263157895, "multiple_choice_grade_stderr": 0.036351509398643456 }, "bigbench_hyperbaton": { "multiple_choice_grade": 0.49508, "multiple_choice_grade_stderr": 0.0022359820804999713 }, "bigbench_sports_understanding": { "multiple_choice_grade": 0.5, "multiple_choice_grade_stderr": 0.015931324696929153 }, "bigbench_logical_deduction_three_objects": { "multiple_choice_grade": 0.3933333333333333, "multiple_choice_grade_stderr": 0.028250090846760875 }, "bigbench_tracking_shuffled_objects_three_objects": { "multiple_choice_grade": 0.3933333333333333, "multiple_choice_grade_stderr": 0.028250090846760875 }, "bigbench_formal_fallacies_syllogisms_negation": { "multiple_choice_grade": 0.5134507042253521, "multiple_choice_grade_stderr": 0.004194535955193854 }, "bigbench_reasoning_about_colored_objects": { "multiple_choice_grade": 0.346, "multiple_choice_grade_stderr": 0.010639483037236658 } }, "versions": { "bigbench_tracking_shuffled_objects_five_objects": 0, "bigbench_logical_deduction_seven_objects": 0, "bigbench_date_understanding": 0, "bigbench_navigate": 0, "bigbench_geometric_shapes": 0, "bigbench_dyck_languages": 0, "bigbench_temporal_sequences": 0, "bigbench_snarks": 0, "bigbench_disambiguation_qa": 0, "bigbench_tracking_shuffled_objects_seven_objects": 0, "bigbench_ruin_names": 0, "bigbench_movie_recommendation": 0, "bigbench_salient_translation_error_detection": 0, "bigbench_logical_deduction_five_objects": 0, "bigbench_causal_judgement": 0, "bigbench_hyperbaton": 0, "bigbench_sports_understanding": 0, "bigbench_logical_deduction_three_objects": 0, "bigbench_tracking_shuffled_objects_three_objects": 0, "bigbench_formal_fallacies_syllogisms_negation": 0, "bigbench_reasoning_about_colored_objects": 0 }, "config": { "model": "hf-causal-experimental", "model_args": "pretrained=/gaueko1/hizkuntza-ereduak/LLaMA/lm/huggingface/7B,use_accelerate=True", "num_fewshot": 3, "batch_size": "auto", "device": "cuda", "no_cache": true, "limit": null, "bootstrap_iters": 100000, "description_dict": {} } }