# opt-350m ## opt-350m.json | Task |Version| Metric |Value| |Stderr| |-------------------------------------------------|------:|--------|----:|---|-----:| |arc_challenge | 0|acc |20.73|± | 1.18| | | |acc_norm|23.89|± | 1.25| |arc_easy | 0|acc |44.02|± | 1.02| | | |acc_norm|40.36|± | 1.01| |hendrycksTest-abstract_algebra | 0|acc |22.00|± | 4.16| | | |acc_norm|24.00|± | 4.29| |hendrycksTest-anatomy | 0|acc |22.22|± | 3.59| | | |acc_norm|20.74|± | 3.50| |hendrycksTest-astronomy | 0|acc |21.05|± | 3.32| | | |acc_norm|33.55|± | 3.84| |hendrycksTest-business_ethics | 0|acc |33.00|± | 4.73| | | |acc_norm|31.00|± | 4.65| |hendrycksTest-clinical_knowledge | 0|acc |21.13|± | 2.51| | | |acc_norm|30.57|± | 2.84| |hendrycksTest-college_biology | 0|acc |27.78|± | 3.75| | | |acc_norm|22.92|± | 3.51| |hendrycksTest-college_chemistry | 0|acc |26.00|± | 4.41| | | |acc_norm|34.00|± | 4.76| |hendrycksTest-college_computer_science | 0|acc |28.00|± | 4.51| | | |acc_norm|24.00|± | 4.29| |hendrycksTest-college_mathematics | 0|acc |22.00|± | 4.16| | | |acc_norm|29.00|± | 4.56| |hendrycksTest-college_medicine | 0|acc |24.86|± | 3.30| | | |acc_norm|23.12|± | 3.21| |hendrycksTest-college_physics | 0|acc |19.61|± | 3.95| | | |acc_norm|24.51|± | 4.28| |hendrycksTest-computer_security | 0|acc |29.00|± | 4.56| | | |acc_norm|32.00|± | 4.69| |hendrycksTest-conceptual_physics | 0|acc |27.66|± | 2.92| | | |acc_norm|20.00|± | 2.61| |hendrycksTest-econometrics | 0|acc |22.81|± | 3.95| | | |acc_norm|26.32|± | 4.14| |hendrycksTest-electrical_engineering | 0|acc |33.10|± | 3.92| | | |acc_norm|31.03|± | 3.86| |hendrycksTest-elementary_mathematics | 0|acc |23.81|± | 2.19| | | |acc_norm|23.54|± | 2.19| |hendrycksTest-formal_logic | 0|acc |32.54|± | 4.19| | | |acc_norm|32.54|± | 4.19| |hendrycksTest-global_facts | 0|acc |24.00|± | 4.29| | | |acc_norm|24.00|± | 4.29| |hendrycksTest-high_school_biology | 0|acc |24.19|± | 2.44| | | |acc_norm|27.74|± | 2.55| |hendrycksTest-high_school_chemistry | 0|acc |19.70|± | 2.80| | | |acc_norm|26.11|± | 3.09| |hendrycksTest-high_school_computer_science | 0|acc |23.00|± | 4.23| | | |acc_norm|26.00|± | 4.41| |hendrycksTest-high_school_european_history | 0|acc |24.85|± | 3.37| | | |acc_norm|31.52|± | 3.63| |hendrycksTest-high_school_geography | 0|acc |21.72|± | 2.94| | | |acc_norm|27.78|± | 3.19| |hendrycksTest-high_school_government_and_politics| 0|acc |23.83|± | 3.07| | | |acc_norm|26.94|± | 3.20| |hendrycksTest-high_school_macroeconomics | 0|acc |27.69|± | 2.27| | | |acc_norm|27.95|± | 2.28| |hendrycksTest-high_school_mathematics | 0|acc |21.85|± | 2.52| | | |acc_norm|27.41|± | 2.72| |hendrycksTest-high_school_microeconomics | 0|acc |19.33|± | 2.56| | | |acc_norm|28.15|± | 2.92| |hendrycksTest-high_school_physics | 0|acc |25.17|± | 3.54| | | |acc_norm|24.50|± | 3.51| |hendrycksTest-high_school_psychology | 0|acc |22.57|± | 1.79| | | |acc_norm|23.30|± | 1.81| |hendrycksTest-high_school_statistics | 0|acc |26.85|± | 3.02| | | |acc_norm|28.70|± | 3.09| |hendrycksTest-high_school_us_history | 0|acc |25.00|± | 3.04| | | |acc_norm|28.43|± | 3.17| |hendrycksTest-high_school_world_history | 0|acc |27.43|± | 2.90| | | |acc_norm|29.96|± | 2.98| |hendrycksTest-human_aging | 0|acc |35.87|± | 3.22| | | |acc_norm|27.80|± | 3.01| |hendrycksTest-human_sexuality | 0|acc |35.11|± | 4.19| | | |acc_norm|32.82|± | 4.12| |hendrycksTest-international_law | 0|acc |17.36|± | 3.46| | | |acc_norm|44.63|± | 4.54| |hendrycksTest-jurisprudence | 0|acc |25.00|± | 4.19| | | |acc_norm|37.04|± | 4.67| |hendrycksTest-logical_fallacies | 0|acc |19.02|± | 3.08| | | |acc_norm|28.22|± | 3.54| |hendrycksTest-machine_learning | 0|acc |29.46|± | 4.33| | | |acc_norm|23.21|± | 4.01| |hendrycksTest-management | 0|acc |15.53|± | 3.59| | | |acc_norm|25.24|± | 4.30| |hendrycksTest-marketing | 0|acc |27.78|± | 2.93| | | |acc_norm|32.05|± | 3.06| |hendrycksTest-medical_genetics | 0|acc |23.00|± | 4.23| | | |acc_norm|39.00|± | 4.90| |hendrycksTest-miscellaneous | 0|acc |28.10|± | 1.61| | | |acc_norm|26.18|± | 1.57| |hendrycksTest-moral_disputes | 0|acc |30.06|± | 2.47| | | |acc_norm|31.79|± | 2.51| |hendrycksTest-moral_scenarios | 0|acc |23.80|± | 1.42| | | |acc_norm|26.82|± | 1.48| |hendrycksTest-nutrition | 0|acc |28.43|± | 2.58| | | |acc_norm|38.24|± | 2.78| |hendrycksTest-philosophy | 0|acc |20.26|± | 2.28| | | |acc_norm|30.87|± | 2.62| |hendrycksTest-prehistory | 0|acc |22.84|± | 2.34| | | |acc_norm|20.68|± | 2.25| |hendrycksTest-professional_accounting | 0|acc |24.11|± | 2.55| | | |acc_norm|23.40|± | 2.53| |hendrycksTest-professional_law | 0|acc |25.62|± | 1.11| | | |acc_norm|27.71|± | 1.14| |hendrycksTest-professional_medicine | 0|acc |23.16|± | 2.56| | | |acc_norm|25.74|± | 2.66| |hendrycksTest-professional_psychology | 0|acc |24.51|± | 1.74| | | |acc_norm|25.49|± | 1.76| |hendrycksTest-public_relations | 0|acc |30.00|± | 4.39| | | |acc_norm|24.55|± | 4.12| |hendrycksTest-security_studies | 0|acc |36.73|± | 3.09| | | |acc_norm|26.53|± | 2.83| |hendrycksTest-sociology | 0|acc |31.34|± | 3.28| | | |acc_norm|28.36|± | 3.19| |hendrycksTest-us_foreign_policy | 0|acc |29.00|± | 4.56| | | |acc_norm|29.00|± | 4.56| |hendrycksTest-virology | 0|acc |29.52|± | 3.55| | | |acc_norm|30.72|± | 3.59| |hendrycksTest-world_religions | 0|acc |26.32|± | 3.38| | | |acc_norm|33.33|± | 3.62| |lambada_openai | 0|ppl |16.40|± | 0.56| | | |acc |45.16|± | 0.69| |logiqa | 0|acc |21.04|± | 1.60| | | |acc_norm|28.57|± | 1.77| |piqa | 0|acc |64.36|± | 1.12| | | |acc_norm|64.74|± | 1.11| |sciq | 0|acc |74.80|± | 1.37| | | |acc_norm|66.90|± | 1.49| |winogrande | 0|acc |52.33|± | 1.40| |wsc | 0|acc |36.54|± | 4.74|