Commit 4d147bdd authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Merge branch 'master' of https://github.com/EleutherAI/lm-evaluation-harness into task-guide

parents 011cc891 dc937d4b
{"results": {"hendrycksTest-high_school_government_and_politics": {"acc": 0.24352331606217617, "acc_norm": 0.23834196891191708, "acc_norm_stderr": 0.03074890536390988, "acc_stderr": 0.030975436386845436}}, "versions": {"hendrycksTest-high_school_government_and_politics": 0}}
\ No newline at end of file
ce4faae2fb6628caa48f6fc74cbc848880db49e6ff51079392778a2322bcefef
\ No newline at end of file
{"results": {"hendrycksTest-high_school_macroeconomics": {"acc": 0.2230769230769231, "acc_norm": 0.22564102564102564, "acc_norm_stderr": 0.021193632525148522, "acc_stderr": 0.021107730127244}}, "versions": {"hendrycksTest-high_school_macroeconomics": 0}}
\ No newline at end of file
ab368d16fc4648ad27940f71abd266366663f51db612f732a0b9b0eea28de9f8
\ No newline at end of file
{"results": {"hendrycksTest-high_school_mathematics": {"acc": 0.22592592592592592, "acc_norm": 0.24814814814814815, "acc_norm_stderr": 0.0263357394040558, "acc_stderr": 0.025497532639609553}}, "versions": {"hendrycksTest-high_school_mathematics": 0}}
\ No newline at end of file
513b998585ebc1ebdefca6435b7c84fd73dc36fc80321a22503467f04efed23e
\ No newline at end of file
{"results": {"hendrycksTest-high_school_microeconomics": {"acc": 0.24369747899159663, "acc_norm": 0.22268907563025211, "acc_norm_stderr": 0.027025433498882378, "acc_stderr": 0.027886828078380558}}, "versions": {"hendrycksTest-high_school_microeconomics": 0}}
\ No newline at end of file
dae59e82d3d4d8dec82239d9620b72cc47bb6efbe2f1c2f9b9d23e849c9c5e32
\ No newline at end of file
{"results": {"hendrycksTest-high_school_physics": {"acc": 0.2582781456953642, "acc_norm": 0.271523178807947, "acc_norm_stderr": 0.03631329803969653, "acc_stderr": 0.035737053147634576}}, "versions": {"hendrycksTest-high_school_physics": 0}}
\ No newline at end of file
0e4c8d13806d3696167e40544d2d114c557c10c74bc61fcb9c51bbfced0266ef
\ No newline at end of file
{"results": {"hendrycksTest-high_school_psychology": {"acc": 0.24587155963302754, "acc_norm": 0.23302752293577983, "acc_norm_stderr": 0.018125669180861493, "acc_stderr": 0.018461940968708436}}, "versions": {"hendrycksTest-high_school_psychology": 0}}
\ No newline at end of file
33d1d6eaaa2c3a944bf49d3f220a4efc328d7c3b3465b7cec40ae36d8984b75f
\ No newline at end of file
{"results": {"hendrycksTest-high_school_statistics": {"acc": 0.2962962962962963, "acc_norm": 0.3055555555555556, "acc_norm_stderr": 0.03141554629402544, "acc_stderr": 0.03114144782353604}}, "versions": {"hendrycksTest-high_school_statistics": 0}}
\ No newline at end of file
8c65c1a28330dd001d395ac11f1bb80c3b33f5935f503e74067aef6e9e1d9d9b
\ No newline at end of file
{"results": {"hendrycksTest-high_school_us_history": {"acc": 0.29901960784313725, "acc_norm": 0.28431372549019607, "acc_norm_stderr": 0.03166009679399814, "acc_stderr": 0.03213325717373618}}, "versions": {"hendrycksTest-high_school_us_history": 0}}
\ No newline at end of file
1c8b994bd9a63ec874fc8d0e3a27077118b7adc472306b2fd6c55635a78b9d52
\ No newline at end of file
{"results": {"hendrycksTest-high_school_world_history": {"acc": 0.23628691983122363, "acc_norm": 0.24472573839662448, "acc_norm_stderr": 0.02798569938703642, "acc_stderr": 0.027652153144159263}}, "versions": {"hendrycksTest-high_school_world_history": 0}}
\ No newline at end of file
0880b3a78f8d7b17ffc612031427b9085367cf65dabe2a68c4b64e3171d17e88
\ No newline at end of file
{"results": {"hendrycksTest-human_aging": {"acc": 0.21524663677130046, "acc_norm": 0.17937219730941703, "acc_norm_stderr": 0.025749819569192804, "acc_stderr": 0.02758406660220827}}, "versions": {"hendrycksTest-human_aging": 0}}
\ No newline at end of file
4b07922fa1d549b655c21440b13d869263ce7dd9771d8147c450f11c91d26c10
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment