# opt-30b ## opt-30b.json | Task |Version| Metric |Value| |Stderr| |-------------------------------------------------|------:|--------|----:|---|-----:| |arc_challenge | 0|acc |34.64|± | 1.39| | | |acc_norm|37.97|± | 1.42| |arc_easy | 0|acc |69.99|± | 0.94| | | |acc_norm|65.36|± | 0.98| |hendrycksTest-abstract_algebra | 0|acc |23.00|± | 4.23| | | |acc_norm|23.00|± | 4.23| |hendrycksTest-anatomy | 0|acc |24.44|± | 3.71| | | |acc_norm|20.74|± | 3.50| |hendrycksTest-astronomy | 0|acc |30.92|± | 3.76| | | |acc_norm|37.50|± | 3.94| |hendrycksTest-business_ethics | 0|acc |34.00|± | 4.76| | | |acc_norm|31.00|± | 4.65| |hendrycksTest-clinical_knowledge | 0|acc |25.66|± | 2.69| | | |acc_norm|30.94|± | 2.85| |hendrycksTest-college_biology | 0|acc |27.08|± | 3.72| | | |acc_norm|28.47|± | 3.77| |hendrycksTest-college_chemistry | 0|acc |25.00|± | 4.35| | | |acc_norm|32.00|± | 4.69| |hendrycksTest-college_computer_science | 0|acc |31.00|± | 4.65| | | |acc_norm|25.00|± | 4.35| |hendrycksTest-college_mathematics | 0|acc |22.00|± | 4.16| | | |acc_norm|36.00|± | 4.82| |hendrycksTest-college_medicine | 0|acc |24.28|± | 3.27| | | |acc_norm|27.17|± | 3.39| |hendrycksTest-college_physics | 0|acc |27.45|± | 4.44| | | |acc_norm|26.47|± | 4.39| |hendrycksTest-computer_security | 0|acc |25.00|± | 4.35| | | |acc_norm|34.00|± | 4.76| |hendrycksTest-conceptual_physics | 0|acc |24.68|± | 2.82| | | |acc_norm|20.00|± | 2.61| |hendrycksTest-econometrics | 0|acc |26.32|± | 4.14| | | |acc_norm|21.05|± | 3.84| |hendrycksTest-electrical_engineering | 0|acc |31.72|± | 3.88| | | |acc_norm|33.10|± | 3.92| |hendrycksTest-elementary_mathematics | 0|acc |26.72|± | 2.28| | | |acc_norm|27.25|± | 2.29| |hendrycksTest-formal_logic | 0|acc |30.95|± | 4.13| | | |acc_norm|26.19|± | 3.93| |hendrycksTest-global_facts | 0|acc |23.00|± | 4.23| | | |acc_norm|24.00|± | 4.29| |hendrycksTest-high_school_biology | 0|acc |28.06|± | 2.56| | | |acc_norm|30.32|± | 2.61| |hendrycksTest-high_school_chemistry | 0|acc |26.60|± | 3.11| | | |acc_norm|31.03|± | 3.26| |hendrycksTest-high_school_computer_science | 0|acc |24.00|± | 4.29| | | |acc_norm|29.00|± | 4.56| |hendrycksTest-high_school_european_history | 0|acc |22.42|± | 3.26| | | |acc_norm|26.06|± | 3.43| |hendrycksTest-high_school_geography | 0|acc |26.26|± | 3.14| | | |acc_norm|28.79|± | 3.23| |hendrycksTest-high_school_government_and_politics| 0|acc |24.87|± | 3.12| | | |acc_norm|24.87|± | 3.12| |hendrycksTest-high_school_macroeconomics | 0|acc |28.46|± | 2.29| | | |acc_norm|28.21|± | 2.28| |hendrycksTest-high_school_mathematics | 0|acc |25.19|± | 2.65| | | |acc_norm|30.37|± | 2.80| |hendrycksTest-high_school_microeconomics | 0|acc |26.89|± | 2.88| | | |acc_norm|34.45|± | 3.09| |hendrycksTest-high_school_physics | 0|acc |25.17|± | 3.54| | | |acc_norm|25.17|± | 3.54| |hendrycksTest-high_school_psychology | 0|acc |24.40|± | 1.84| | | |acc_norm|24.59|± | 1.85| |hendrycksTest-high_school_statistics | 0|acc |34.26|± | 3.24| | | |acc_norm|36.11|± | 3.28| |hendrycksTest-high_school_us_history | 0|acc |28.43|± | 3.17| | | |acc_norm|25.98|± | 3.08| |hendrycksTest-high_school_world_history | 0|acc |26.16|± | 2.86| | | |acc_norm|27.00|± | 2.89| |hendrycksTest-human_aging | 0|acc |25.56|± | 2.93| | | |acc_norm|22.87|± | 2.82| |hendrycksTest-human_sexuality | 0|acc |37.40|± | 4.24| | | |acc_norm|31.30|± | 4.07| |hendrycksTest-international_law | 0|acc |25.62|± | 3.98| | | |acc_norm|53.72|± | 4.55| |hendrycksTest-jurisprudence | 0|acc |35.19|± | 4.62| | | |acc_norm|43.52|± | 4.79| |hendrycksTest-logical_fallacies | 0|acc |30.06|± | 3.60| | | |acc_norm|34.36|± | 3.73| |hendrycksTest-machine_learning | 0|acc |24.11|± | 4.06| | | |acc_norm|23.21|± | 4.01| |hendrycksTest-management | 0|acc |27.18|± | 4.41| | | |acc_norm|35.92|± | 4.75| |hendrycksTest-marketing | 0|acc |25.64|± | 2.86| | | |acc_norm|30.77|± | 3.02| |hendrycksTest-medical_genetics | 0|acc |24.00|± | 4.29| | | |acc_norm|39.00|± | 4.90| |hendrycksTest-miscellaneous | 0|acc |31.16|± | 1.66| | | |acc_norm|28.10|± | 1.61| |hendrycksTest-moral_disputes | 0|acc |28.32|± | 2.43| | | |acc_norm|33.53|± | 2.54| |hendrycksTest-moral_scenarios | 0|acc |24.69|± | 1.44| | | |acc_norm|24.47|± | 1.44| |hendrycksTest-nutrition | 0|acc |30.39|± | 2.63| | | |acc_norm|40.52|± | 2.81| |hendrycksTest-philosophy | 0|acc |29.26|± | 2.58| | | |acc_norm|33.44|± | 2.68| |hendrycksTest-prehistory | 0|acc |24.07|± | 2.38| | | |acc_norm|17.90|± | 2.13| |hendrycksTest-professional_accounting | 0|acc |21.63|± | 2.46| | | |acc_norm|25.53|± | 2.60| |hendrycksTest-professional_law | 0|acc |27.57|± | 1.14| | | |acc_norm|28.42|± | 1.15| |hendrycksTest-professional_medicine | 0|acc |27.57|± | 2.71| | | |acc_norm|27.57|± | 2.71| |hendrycksTest-professional_psychology | 0|acc |27.94|± | 1.82| | | |acc_norm|27.29|± | 1.80| |hendrycksTest-public_relations | 0|acc |28.18|± | 4.31| | | |acc_norm|20.00|± | 3.83| |hendrycksTest-security_studies | 0|acc |37.96|± | 3.11| | | |acc_norm|34.29|± | 3.04| |hendrycksTest-sociology | 0|acc |25.37|± | 3.08| | | |acc_norm|22.39|± | 2.95| |hendrycksTest-us_foreign_policy | 0|acc |42.00|± | 4.96| | | |acc_norm|42.00|± | 4.96| |hendrycksTest-virology | 0|acc |32.53|± | 3.65| | | |acc_norm|27.11|± | 3.46| |hendrycksTest-world_religions | 0|acc |32.16|± | 3.58| | | |acc_norm|39.18|± | 3.74| |lambada_openai | 0|ppl | 3.63|± | 0.07| | | |acc |71.45|± | 0.63| |logiqa | 0|acc |21.66|± | 1.62| | | |acc_norm|28.42|± | 1.77| |piqa | 0|acc |77.58|± | 0.97| | | |acc_norm|78.13|± | 0.96| |sciq | 0|acc |91.10|± | 0.90| | | |acc_norm|88.20|± | 1.02| |winogrande | 0|acc |68.19|± | 1.31| |wsc | 0|acc |59.62|± | 4.83|