# opt-6.7b ## opt-6.7b.json | Task |Version| Metric |Value| |Stderr| |-------------------------------------------------|------:|--------|----:|---|-----:| |arc_challenge | 0|acc |30.55|± | 1.35| | | |acc_norm|34.73|± | 1.39| |arc_easy | 0|acc |65.61|± | 0.97| | | |acc_norm|60.10|± | 1.00| |hendrycksTest-abstract_algebra | 0|acc |22.00|± | 4.16| | | |acc_norm|21.00|± | 4.09| |hendrycksTest-anatomy | 0|acc |22.22|± | 3.59| | | |acc_norm|23.70|± | 3.67| |hendrycksTest-astronomy | 0|acc |26.97|± | 3.61| | | |acc_norm|32.24|± | 3.80| |hendrycksTest-business_ethics | 0|acc |33.00|± | 4.73| | | |acc_norm|24.00|± | 4.29| |hendrycksTest-clinical_knowledge | 0|acc |26.04|± | 2.70| | | |acc_norm|29.81|± | 2.82| |hendrycksTest-college_biology | 0|acc |29.17|± | 3.80| | | |acc_norm|24.31|± | 3.59| |hendrycksTest-college_chemistry | 0|acc |22.00|± | 4.16| | | |acc_norm|36.00|± | 4.82| |hendrycksTest-college_computer_science | 0|acc |36.00|± | 4.82| | | |acc_norm|31.00|± | 4.65| |hendrycksTest-college_mathematics | 0|acc |22.00|± | 4.16| | | |acc_norm|27.00|± | 4.46| |hendrycksTest-college_medicine | 0|acc |20.81|± | 3.10| | | |acc_norm|21.39|± | 3.13| |hendrycksTest-college_physics | 0|acc |20.59|± | 4.02| | | |acc_norm|23.53|± | 4.22| |hendrycksTest-computer_security | 0|acc |24.00|± | 4.29| | | |acc_norm|28.00|± | 4.51| |hendrycksTest-conceptual_physics | 0|acc |27.23|± | 2.91| | | |acc_norm|21.70|± | 2.69| |hendrycksTest-econometrics | 0|acc |25.44|± | 4.10| | | |acc_norm|25.44|± | 4.10| |hendrycksTest-electrical_engineering | 0|acc |29.66|± | 3.81| | | |acc_norm|34.48|± | 3.96| |hendrycksTest-elementary_mathematics | 0|acc |24.60|± | 2.22| | | |acc_norm|25.13|± | 2.23| |hendrycksTest-formal_logic | 0|acc |29.37|± | 4.07| | | |acc_norm|24.60|± | 3.85| |hendrycksTest-global_facts | 0|acc |18.00|± | 3.86| | | |acc_norm|22.00|± | 4.16| |hendrycksTest-high_school_biology | 0|acc |25.16|± | 2.47| | | |acc_norm|28.71|± | 2.57| |hendrycksTest-high_school_chemistry | 0|acc |16.75|± | 2.63| | | |acc_norm|27.59|± | 3.14| |hendrycksTest-high_school_computer_science | 0|acc |24.00|± | 4.29| | | |acc_norm|33.00|± | 4.73| |hendrycksTest-high_school_european_history | 0|acc |32.12|± | 3.65| | | |acc_norm|27.88|± | 3.50| |hendrycksTest-high_school_geography | 0|acc |21.72|± | 2.94| | | |acc_norm|27.27|± | 3.17| |hendrycksTest-high_school_government_and_politics| 0|acc |24.87|± | 3.12| | | |acc_norm|24.35|± | 3.10| |hendrycksTest-high_school_macroeconomics | 0|acc |28.97|± | 2.30| | | |acc_norm|27.95|± | 2.28| |hendrycksTest-high_school_mathematics | 0|acc |24.07|± | 2.61| | | |acc_norm|31.48|± | 2.83| |hendrycksTest-high_school_microeconomics | 0|acc |26.89|± | 2.88| | | |acc_norm|31.93|± | 3.03| |hendrycksTest-high_school_physics | 0|acc |21.19|± | 3.34| | | |acc_norm|22.52|± | 3.41| |hendrycksTest-high_school_psychology | 0|acc |28.81|± | 1.94| | | |acc_norm|24.95|± | 1.86| |hendrycksTest-high_school_statistics | 0|acc |27.78|± | 3.05| | | |acc_norm|32.41|± | 3.19| |hendrycksTest-high_school_us_history | 0|acc |26.47|± | 3.10| | | |acc_norm|25.98|± | 3.08| |hendrycksTest-high_school_world_history | 0|acc |24.47|± | 2.80| | | |acc_norm|30.38|± | 2.99| |hendrycksTest-human_aging | 0|acc |30.49|± | 3.09| | | |acc_norm|29.15|± | 3.05| |hendrycksTest-human_sexuality | 0|acc |33.59|± | 4.14| | | |acc_norm|29.77|± | 4.01| |hendrycksTest-international_law | 0|acc |31.40|± | 4.24| | | |acc_norm|46.28|± | 4.55| |hendrycksTest-jurisprudence | 0|acc |25.93|± | 4.24| | | |acc_norm|44.44|± | 4.80| |hendrycksTest-logical_fallacies | 0|acc |20.25|± | 3.16| | | |acc_norm|27.61|± | 3.51| |hendrycksTest-machine_learning | 0|acc |20.54|± | 3.83| | | |acc_norm|22.32|± | 3.95| |hendrycksTest-management | 0|acc |29.13|± | 4.50| | | |acc_norm|34.95|± | 4.72| |hendrycksTest-marketing | 0|acc |28.21|± | 2.95| | | |acc_norm|32.05|± | 3.06| |hendrycksTest-medical_genetics | 0|acc |31.00|± | 4.65| | | |acc_norm|35.00|± | 4.79| |hendrycksTest-miscellaneous | 0|acc |31.03|± | 1.65| | | |acc_norm|27.46|± | 1.60| |hendrycksTest-moral_disputes | 0|acc |27.17|± | 2.39| | | |acc_norm|31.50|± | 2.50| |hendrycksTest-moral_scenarios | 0|acc |27.26|± | 1.49| | | |acc_norm|27.26|± | 1.49| |hendrycksTest-nutrition | 0|acc |30.72|± | 2.64| | | |acc_norm|39.22|± | 2.80| |hendrycksTest-philosophy | 0|acc |27.33|± | 2.53| | | |acc_norm|31.83|± | 2.65| |hendrycksTest-prehistory | 0|acc |25.00|± | 2.41| | | |acc_norm|20.06|± | 2.23| |hendrycksTest-professional_accounting | 0|acc |25.89|± | 2.61| | | |acc_norm|25.89|± | 2.61| |hendrycksTest-professional_law | 0|acc |26.14|± | 1.12| | | |acc_norm|29.34|± | 1.16| |hendrycksTest-professional_medicine | 0|acc |21.69|± | 2.50| | | |acc_norm|24.26|± | 2.60| |hendrycksTest-professional_psychology | 0|acc |25.16|± | 1.76| | | |acc_norm|25.16|± | 1.76| |hendrycksTest-public_relations | 0|acc |32.73|± | 4.49| | | |acc_norm|18.18|± | 3.69| |hendrycksTest-security_studies | 0|acc |42.04|± | 3.16| | | |acc_norm|33.88|± | 3.03| |hendrycksTest-sociology | 0|acc |28.36|± | 3.19| | | |acc_norm|30.85|± | 3.27| |hendrycksTest-us_foreign_policy | 0|acc |40.00|± | 4.92| | | |acc_norm|35.00|± | 4.79| |hendrycksTest-virology | 0|acc |33.73|± | 3.68| | | |acc_norm|29.52|± | 3.55| |hendrycksTest-world_religions | 0|acc |34.50|± | 3.65| | | |acc_norm|36.26|± | 3.69| |lambada_openai | 0|ppl | 4.25|± | 0.09| | | |acc |67.71|± | 0.65| |logiqa | 0|acc |23.50|± | 1.66| | | |acc_norm|28.73|± | 1.77| |piqa | 0|acc |76.28|± | 0.99| | | |acc_norm|76.44|± | 0.99| |sciq | 0|acc |90.10|± | 0.94| | | |acc_norm|85.20|± | 1.12| |winogrande | 0|acc |65.27|± | 1.34| |wsc | 0|acc |42.31|± | 4.87|