# opt-13b ## opt-13b.json | Task |Version| Metric |Value| |Stderr| |-------------------------------------------------|------:|--------|----:|---|-----:| |arc_challenge | 0|acc |32.94|± | 1.37| | | |acc_norm|35.75|± | 1.40| |arc_easy | 0|acc |67.09|± | 0.96| | | |acc_norm|61.78|± | 1.00| |hendrycksTest-abstract_algebra | 0|acc |23.00|± | 4.23| | | |acc_norm|24.00|± | 4.29| |hendrycksTest-anatomy | 0|acc |26.67|± | 3.82| | | |acc_norm|23.70|± | 3.67| |hendrycksTest-astronomy | 0|acc |31.58|± | 3.78| | | |acc_norm|38.16|± | 3.95| |hendrycksTest-business_ethics | 0|acc |29.00|± | 4.56| | | |acc_norm|32.00|± | 4.69| |hendrycksTest-clinical_knowledge | 0|acc |24.15|± | 2.63| | | |acc_norm|30.19|± | 2.83| |hendrycksTest-college_biology | 0|acc |20.83|± | 3.40| | | |acc_norm|21.53|± | 3.44| |hendrycksTest-college_chemistry | 0|acc |24.00|± | 4.29| | | |acc_norm|33.00|± | 4.73| |hendrycksTest-college_computer_science | 0|acc |30.00|± | 4.61| | | |acc_norm|27.00|± | 4.46| |hendrycksTest-college_mathematics | 0|acc |21.00|± | 4.09| | | |acc_norm|30.00|± | 4.61| |hendrycksTest-college_medicine | 0|acc |25.43|± | 3.32| | | |acc_norm|22.54|± | 3.19| |hendrycksTest-college_physics | 0|acc |20.59|± | 4.02| | | |acc_norm|23.53|± | 4.22| |hendrycksTest-computer_security | 0|acc |23.00|± | 4.23| | | |acc_norm|34.00|± | 4.76| |hendrycksTest-conceptual_physics | 0|acc |24.26|± | 2.80| | | |acc_norm|20.00|± | 2.61| |hendrycksTest-econometrics | 0|acc |35.09|± | 4.49| | | |acc_norm|29.82|± | 4.30| |hendrycksTest-electrical_engineering | 0|acc |31.72|± | 3.88| | | |acc_norm|31.72|± | 3.88| |hendrycksTest-elementary_mathematics | 0|acc |24.07|± | 2.20| | | |acc_norm|26.72|± | 2.28| |hendrycksTest-formal_logic | 0|acc |28.57|± | 4.04| | | |acc_norm|23.81|± | 3.81| |hendrycksTest-global_facts | 0|acc |24.00|± | 4.29| | | |acc_norm|24.00|± | 4.29| |hendrycksTest-high_school_biology | 0|acc |25.16|± | 2.47| | | |acc_norm|28.39|± | 2.56| |hendrycksTest-high_school_chemistry | 0|acc |23.65|± | 2.99| | | |acc_norm|32.51|± | 3.30| |hendrycksTest-high_school_computer_science | 0|acc |30.00|± | 4.61| | | |acc_norm|28.00|± | 4.51| |hendrycksTest-high_school_european_history | 0|acc |25.45|± | 3.40| | | |acc_norm|29.70|± | 3.57| |hendrycksTest-high_school_geography | 0|acc |28.79|± | 3.23| | | |acc_norm|30.30|± | 3.27| |hendrycksTest-high_school_government_and_politics| 0|acc |29.53|± | 3.29| | | |acc_norm|25.39|± | 3.14| |hendrycksTest-high_school_macroeconomics | 0|acc |29.49|± | 2.31| | | |acc_norm|29.49|± | 2.31| |hendrycksTest-high_school_mathematics | 0|acc |27.41|± | 2.72| | | |acc_norm|30.00|± | 2.79| |hendrycksTest-high_school_microeconomics | 0|acc |26.05|± | 2.85| | | |acc_norm|32.77|± | 3.05| |hendrycksTest-high_school_physics | 0|acc |24.50|± | 3.51| | | |acc_norm|22.52|± | 3.41| |hendrycksTest-high_school_psychology | 0|acc |27.52|± | 1.91| | | |acc_norm|24.40|± | 1.84| |hendrycksTest-high_school_statistics | 0|acc |29.63|± | 3.11| | | |acc_norm|29.17|± | 3.10| |hendrycksTest-high_school_us_history | 0|acc |23.04|± | 2.96| | | |acc_norm|27.94|± | 3.15| |hendrycksTest-high_school_world_history | 0|acc |25.74|± | 2.85| | | |acc_norm|29.54|± | 2.97| |hendrycksTest-human_aging | 0|acc |29.15|± | 3.05| | | |acc_norm|24.66|± | 2.89| |hendrycksTest-human_sexuality | 0|acc |32.06|± | 4.09| | | |acc_norm|31.30|± | 4.07| |hendrycksTest-international_law | 0|acc |28.10|± | 4.10| | | |acc_norm|43.80|± | 4.53| |hendrycksTest-jurisprudence | 0|acc |29.63|± | 4.41| | | |acc_norm|43.52|± | 4.79| |hendrycksTest-logical_fallacies | 0|acc |31.90|± | 3.66| | | |acc_norm|30.06|± | 3.60| |hendrycksTest-machine_learning | 0|acc |27.68|± | 4.25| | | |acc_norm|24.11|± | 4.06| |hendrycksTest-management | 0|acc |33.01|± | 4.66| | | |acc_norm|32.04|± | 4.62| |hendrycksTest-marketing | 0|acc |29.06|± | 2.97| | | |acc_norm|29.91|± | 3.00| |hendrycksTest-medical_genetics | 0|acc |27.00|± | 4.46| | | |acc_norm|39.00|± | 4.90| |hendrycksTest-miscellaneous | 0|acc |32.31|± | 1.67| | | |acc_norm|28.99|± | 1.62| |hendrycksTest-moral_disputes | 0|acc |30.35|± | 2.48| | | |acc_norm|30.92|± | 2.49| |hendrycksTest-moral_scenarios | 0|acc |27.26|± | 1.49| | | |acc_norm|27.26|± | 1.49| |hendrycksTest-nutrition | 0|acc |29.41|± | 2.61| | | |acc_norm|36.93|± | 2.76| |hendrycksTest-philosophy | 0|acc |25.40|± | 2.47| | | |acc_norm|31.83|± | 2.65| |hendrycksTest-prehistory | 0|acc |25.31|± | 2.42| | | |acc_norm|20.37|± | 2.24| |hendrycksTest-professional_accounting | 0|acc |27.30|± | 2.66| | | |acc_norm|28.37|± | 2.69| |hendrycksTest-professional_law | 0|acc |27.12|± | 1.14| | | |acc_norm|28.10|± | 1.15| |hendrycksTest-professional_medicine | 0|acc |26.47|± | 2.68| | | |acc_norm|26.84|± | 2.69| |hendrycksTest-professional_psychology | 0|acc |26.96|± | 1.80| | | |acc_norm|26.96|± | 1.80| |hendrycksTest-public_relations | 0|acc |27.27|± | 4.27| | | |acc_norm|20.00|± | 3.83| |hendrycksTest-security_studies | 0|acc |36.73|± | 3.09| | | |acc_norm|28.98|± | 2.90| |hendrycksTest-sociology | 0|acc |28.36|± | 3.19| | | |acc_norm|28.36|± | 3.19| |hendrycksTest-us_foreign_policy | 0|acc |31.00|± | 4.65| | | |acc_norm|31.00|± | 4.65| |hendrycksTest-virology | 0|acc |33.73|± | 3.68| | | |acc_norm|24.70|± | 3.36| |hendrycksTest-world_religions | 0|acc |33.92|± | 3.63| | | |acc_norm|39.77|± | 3.75| |lambada_openai | 0|ppl | 4.04|± | 0.09| | | |acc |68.64|± | 0.65| |logiqa | 0|acc |22.73|± | 1.64| | | |acc_norm|28.57|± | 1.77| |piqa | 0|acc |75.95|± | 1.00| | | |acc_norm|76.82|± | 0.98| |sciq | 0|acc |90.80|± | 0.91| | | |acc_norm|86.60|± | 1.08| |winogrande | 0|acc |65.19|± | 1.34| |wsc | 0|acc |60.58|± | 4.82|