Commit 4d147bdd authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Merge branch 'master' of https://github.com/EleutherAI/lm-evaluation-harness into task-guide

parents 011cc891 dc937d4b
{"results": {"hendrycksTest-human_sexuality": {"acc": 0.22137404580152673, "acc_norm": 0.22900763358778625, "acc_norm_stderr": 0.036853466317118506, "acc_stderr": 0.0364129708131373}}, "versions": {"hendrycksTest-human_sexuality": 0}}
\ No newline at end of file
ea9b2cefd27959db564168f6ad1169a5eaa012fc5a5d5b8faf9e34d94e335dc1
\ No newline at end of file
{"results": {"hendrycksTest-international_law": {"acc": 0.2396694214876033, "acc_norm": 0.3140495867768595, "acc_norm_stderr": 0.042369647530410164, "acc_stderr": 0.03896878985070417}}, "versions": {"hendrycksTest-international_law": 0}}
\ No newline at end of file
cac440189f1ec778e82f4975d88b74689553ecc5116aaa7f76587a50c1a610e0
\ No newline at end of file
{"results": {"hendrycksTest-jurisprudence": {"acc": 0.25, "acc_norm": 0.3148148148148148, "acc_norm_stderr": 0.04489931073591312, "acc_stderr": 0.04186091791394607}}, "versions": {"hendrycksTest-jurisprudence": 0}}
\ No newline at end of file
2e9449dd803f9e2334dc562d9f04031fd013ed36b883b44ab500533a5dbbface
\ No newline at end of file
{"results": {"hendrycksTest-logical_fallacies": {"acc": 0.20245398773006135, "acc_norm": 0.2147239263803681, "acc_norm_stderr": 0.03226219377286774, "acc_stderr": 0.03157065078911902}}, "versions": {"hendrycksTest-logical_fallacies": 0}}
\ No newline at end of file
7a7138821a66ef946e427b40344cf7f1a916a2926995a85ef731a3bee40cb7ce
\ No newline at end of file
{"results": {"hendrycksTest-machine_learning": {"acc": 0.23214285714285715, "acc_norm": 0.22321428571428573, "acc_norm_stderr": 0.039523019677025116, "acc_stderr": 0.04007341809755806}}, "versions": {"hendrycksTest-machine_learning": 0}}
\ No newline at end of file
355489f4bd176ab84db5ef4c03d56ddeeeb1b0ad69827122b2d800e1cdc7e5f0
\ No newline at end of file
{"results": {"hendrycksTest-management": {"acc": 0.24271844660194175, "acc_norm": 0.2621359223300971, "acc_norm_stderr": 0.043546310772605956, "acc_stderr": 0.04245022486384495}}, "versions": {"hendrycksTest-management": 0}}
\ No newline at end of file
b4fa0681fe54671a80509779d4338d744097a7206687f62977df7145dfa74a66
\ No newline at end of file
{"results": {"hendrycksTest-marketing": {"acc": 0.2863247863247863, "acc_norm": 0.2905982905982906, "acc_norm_stderr": 0.029745048572674043, "acc_stderr": 0.029614323690456648}}, "versions": {"hendrycksTest-marketing": 0}}
\ No newline at end of file
db6141246889a19dd3f6b9109f314d49c1a70f7a98795858804378b095c4a2fe
\ No newline at end of file
{"results": {"hendrycksTest-medical_genetics": {"acc": 0.27, "acc_norm": 0.29, "acc_norm_stderr": 0.04560480215720684, "acc_stderr": 0.0446196043338474}}, "versions": {"hendrycksTest-medical_genetics": 0}}
\ No newline at end of file
972dd88dbbaf09d14766e243cfc233425e7c01a26dbc61bdb9eeefa788822331
\ No newline at end of file
{"results": {"hendrycksTest-miscellaneous": {"acc": 0.23499361430395913, "acc_norm": 0.2515964240102171, "acc_norm_stderr": 0.015517322365529622, "acc_stderr": 0.015162024152278445}}, "versions": {"hendrycksTest-miscellaneous": 0}}
\ No newline at end of file
d6ef028022c02b69d1516973e08bebaa14d8debcf2589a2bb124823178202d20
\ No newline at end of file
{"results": {"hendrycksTest-moral_disputes": {"acc": 0.24855491329479767, "acc_norm": 0.27167630057803466, "acc_norm_stderr": 0.023948512905468365, "acc_stderr": 0.023267528432100174}}, "versions": {"hendrycksTest-moral_disputes": 0}}
\ No newline at end of file
a8e1882e77728b53c8b86312254d08320d8363fb606d746a8dd145b812f62cf5
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment