# opt-2.7b ## opt-2.7b.json | Task |Version| Metric |Value| |Stderr| |-------------------------------------------------|------:|--------|----:|---|-----:| |arc_challenge | 0|acc |26.79|± | 1.29| | | |acc_norm|31.23|± | 1.35| |arc_easy | 0|acc |60.77|± | 1.00| | | |acc_norm|54.29|± | 1.02| |hendrycksTest-abstract_algebra | 0|acc |23.00|± | 4.23| | | |acc_norm|22.00|± | 4.16| |hendrycksTest-anatomy | 0|acc |24.44|± | 3.71| | | |acc_norm|24.44|± | 3.71| |hendrycksTest-astronomy | 0|acc |25.00|± | 3.52| | | |acc_norm|34.87|± | 3.88| |hendrycksTest-business_ethics | 0|acc |25.00|± | 4.35| | | |acc_norm|28.00|± | 4.51| |hendrycksTest-clinical_knowledge | 0|acc |22.26|± | 2.56| | | |acc_norm|29.43|± | 2.80| |hendrycksTest-college_biology | 0|acc |26.39|± | 3.69| | | |acc_norm|24.31|± | 3.59| |hendrycksTest-college_chemistry | 0|acc |21.00|± | 4.09| | | |acc_norm|30.00|± | 4.61| |hendrycksTest-college_computer_science | 0|acc |34.00|± | 4.76| | | |acc_norm|32.00|± | 4.69| |hendrycksTest-college_mathematics | 0|acc |20.00|± | 4.02| | | |acc_norm|28.00|± | 4.51| |hendrycksTest-college_medicine | 0|acc |21.97|± | 3.16| | | |acc_norm|23.70|± | 3.24| |hendrycksTest-college_physics | 0|acc |36.27|± | 4.78| | | |acc_norm|28.43|± | 4.49| |hendrycksTest-computer_security | 0|acc |25.00|± | 4.35| | | |acc_norm|35.00|± | 4.79| |hendrycksTest-conceptual_physics | 0|acc |26.81|± | 2.90| | | |acc_norm|22.55|± | 2.73| |hendrycksTest-econometrics | 0|acc |27.19|± | 4.19| | | |acc_norm|24.56|± | 4.05| |hendrycksTest-electrical_engineering | 0|acc |33.79|± | 3.94| | | |acc_norm|35.17|± | 3.98| |hendrycksTest-elementary_mathematics | 0|acc |25.93|± | 2.26| | | |acc_norm|27.78|± | 2.31| |hendrycksTest-formal_logic | 0|acc |31.75|± | 4.16| | | |acc_norm|26.19|± | 3.93| |hendrycksTest-global_facts | 0|acc |25.00|± | 4.35| | | |acc_norm|27.00|± | 4.46| |hendrycksTest-high_school_biology | 0|acc |21.94|± | 2.35| | | |acc_norm|26.13|± | 2.50| |hendrycksTest-high_school_chemistry | 0|acc |19.70|± | 2.80| | | |acc_norm|30.05|± | 3.23| |hendrycksTest-high_school_computer_science | 0|acc |19.00|± | 3.94| | | |acc_norm|27.00|± | 4.46| |hendrycksTest-high_school_european_history | 0|acc |26.06|± | 3.43| | | |acc_norm|27.88|± | 3.50| |hendrycksTest-high_school_geography | 0|acc |19.19|± | 2.81| | | |acc_norm|28.79|± | 3.23| |hendrycksTest-high_school_government_and_politics| 0|acc |24.87|± | 3.12| | | |acc_norm|25.39|± | 3.14| |hendrycksTest-high_school_macroeconomics | 0|acc |26.67|± | 2.24| | | |acc_norm|27.44|± | 2.26| |hendrycksTest-high_school_mathematics | 0|acc |22.59|± | 2.55| | | |acc_norm|28.15|± | 2.74| |hendrycksTest-high_school_microeconomics | 0|acc |20.59|± | 2.63| | | |acc_norm|26.89|± | 2.88| |hendrycksTest-high_school_physics | 0|acc |23.18|± | 3.45| | | |acc_norm|27.15|± | 3.63| |hendrycksTest-high_school_psychology | 0|acc |26.79|± | 1.90| | | |acc_norm|24.40|± | 1.84| |hendrycksTest-high_school_statistics | 0|acc |24.54|± | 2.93| | | |acc_norm|29.17|± | 3.10| |hendrycksTest-high_school_us_history | 0|acc |29.90|± | 3.21| | | |acc_norm|26.47|± | 3.10| |hendrycksTest-high_school_world_history | 0|acc |24.89|± | 2.81| | | |acc_norm|25.74|± | 2.85| |hendrycksTest-human_aging | 0|acc |32.29|± | 3.14| | | |acc_norm|25.11|± | 2.91| |hendrycksTest-human_sexuality | 0|acc |38.17|± | 4.26| | | |acc_norm|28.24|± | 3.95| |hendrycksTest-international_law | 0|acc |21.49|± | 3.75| | | |acc_norm|40.50|± | 4.48| |hendrycksTest-jurisprudence | 0|acc |31.48|± | 4.49| | | |acc_norm|47.22|± | 4.83| |hendrycksTest-logical_fallacies | 0|acc |23.93|± | 3.35| | | |acc_norm|26.99|± | 3.49| |hendrycksTest-machine_learning | 0|acc |29.46|± | 4.33| | | |acc_norm|25.89|± | 4.16| |hendrycksTest-management | 0|acc |24.27|± | 4.25| | | |acc_norm|24.27|± | 4.25| |hendrycksTest-marketing | 0|acc |27.78|± | 2.93| | | |acc_norm|29.91|± | 3.00| |hendrycksTest-medical_genetics | 0|acc |28.00|± | 4.51| | | |acc_norm|36.00|± | 4.82| |hendrycksTest-miscellaneous | 0|acc |28.74|± | 1.62| | | |acc_norm|27.97|± | 1.61| |hendrycksTest-moral_disputes | 0|acc |25.72|± | 2.35| | | |acc_norm|31.79|± | 2.51| |hendrycksTest-moral_scenarios | 0|acc |28.27|± | 1.51| | | |acc_norm|27.26|± | 1.49| |hendrycksTest-nutrition | 0|acc |29.74|± | 2.62| | | |acc_norm|36.27|± | 2.75| |hendrycksTest-philosophy | 0|acc |24.12|± | 2.43| | | |acc_norm|30.23|± | 2.61| |hendrycksTest-prehistory | 0|acc |22.53|± | 2.32| | | |acc_norm|21.30|± | 2.28| |hendrycksTest-professional_accounting | 0|acc |20.92|± | 2.43| | | |acc_norm|25.53|± | 2.60| |hendrycksTest-professional_law | 0|acc |26.40|± | 1.13| | | |acc_norm|28.23|± | 1.15| |hendrycksTest-professional_medicine | 0|acc |23.16|± | 2.56| | | |acc_norm|24.26|± | 2.60| |hendrycksTest-professional_psychology | 0|acc |27.29|± | 1.80| | | |acc_norm|26.31|± | 1.78| |hendrycksTest-public_relations | 0|acc |32.73|± | 4.49| | | |acc_norm|22.73|± | 4.01| |hendrycksTest-security_studies | 0|acc |32.24|± | 2.99| | | |acc_norm|25.71|± | 2.80| |hendrycksTest-sociology | 0|acc |25.37|± | 3.08| | | |acc_norm|27.86|± | 3.17| |hendrycksTest-us_foreign_policy | 0|acc |31.00|± | 4.65| | | |acc_norm|31.00|± | 4.65| |hendrycksTest-virology | 0|acc |36.75|± | 3.75| | | |acc_norm|33.13|± | 3.66| |hendrycksTest-world_religions | 0|acc |33.92|± | 3.63| | | |acc_norm|45.61|± | 3.82| |lambada_openai | 0|ppl | 5.12|± | 0.12| | | |acc |63.59|± | 0.67| |logiqa | 0|acc |21.04|± | 1.60| | | |acc_norm|25.96|± | 1.72| |piqa | 0|acc |73.88|± | 1.02| | | |acc_norm|74.81|± | 1.01| |sciq | 0|acc |85.80|± | 1.10| | | |acc_norm|79.00|± | 1.29| |winogrande | 0|acc |61.01|± | 1.37| |wsc | 0|acc |63.46|± | 4.74|