Commit 4d147bdd authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Merge branch 'master' of https://github.com/EleutherAI/lm-evaluation-harness into task-guide

parents 011cc891 dc937d4b
{"results": {"hendrycksTest-business_ethics": {"acc": 0.29, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.045604802157206845}}, "versions": {"hendrycksTest-business_ethics": 0}}
\ No newline at end of file
fbcb7ce507e0675d811e71e10a67c8d05a6605e29036f46776e04a6588cefbda
\ No newline at end of file
{"results": {"hendrycksTest-clinical_knowledge": {"acc": 0.23773584905660378, "acc_norm": 0.27169811320754716, "acc_norm_stderr": 0.027377706624670713, "acc_stderr": 0.02619980880756191}}, "versions": {"hendrycksTest-clinical_knowledge": 0}}
\ No newline at end of file
c29e4e67ff91af29b9434884874414d1b1b32ccc32903c6b1639469b19907419
\ No newline at end of file
{"results": {"hendrycksTest-college_biology": {"acc": 0.24305555555555555, "acc_norm": 0.2361111111111111, "acc_norm_stderr": 0.03551446610810826, "acc_stderr": 0.03586879280080341}}, "versions": {"hendrycksTest-college_biology": 0}}
\ No newline at end of file
044752b21540db95118b8cbe7e75c4c9b8758e27df56543deaeadec7f749a28d
\ No newline at end of file
{"results": {"hendrycksTest-college_chemistry": {"acc": 0.28, "acc_norm": 0.26, "acc_norm_stderr": 0.04408440022768078, "acc_stderr": 0.04512608598542127}}, "versions": {"hendrycksTest-college_chemistry": 0}}
\ No newline at end of file
4ea26ad780290429ac5a3317559c154848d662bd40532c966458ba6f2a32d0a3
\ No newline at end of file
{"results": {"hendrycksTest-college_computer_science": {"acc": 0.22, "acc_norm": 0.24, "acc_norm_stderr": 0.04292346959909282, "acc_stderr": 0.041633319989322695}}, "versions": {"hendrycksTest-college_computer_science": 0}}
\ No newline at end of file
e9fe80752686527281f834d2397875b4580581434b94799f9de6aaa450bd73ff
\ No newline at end of file
{"results": {"hendrycksTest-college_mathematics": {"acc": 0.18, "acc_norm": 0.2, "acc_norm_stderr": 0.04020151261036844, "acc_stderr": 0.038612291966536955}}, "versions": {"hendrycksTest-college_mathematics": 0}}
\ No newline at end of file
dd6e0a9be1407890e9f8cd4434fb6aa4752ab3d2473837fd465ad99f60ad685e
\ No newline at end of file
{"results": {"hendrycksTest-college_medicine": {"acc": 0.27167630057803466, "acc_norm": 0.2543352601156069, "acc_norm_stderr": 0.0332055644308557, "acc_stderr": 0.03391750322321659}}, "versions": {"hendrycksTest-college_medicine": 0}}
\ No newline at end of file
704a7671ef981fb95594782bc446dd632e87ebdbe89436a0603b714fb5786c75
\ No newline at end of file
{"results": {"hendrycksTest-college_physics": {"acc": 0.23529411764705882, "acc_norm": 0.23529411764705882, "acc_norm_stderr": 0.04220773659171453, "acc_stderr": 0.04220773659171452}}, "versions": {"hendrycksTest-college_physics": 0}}
\ No newline at end of file
a8a1892d1906cc3e7ffd321043f0a60f3b8b69ef76e5c6ff03c6ea41dc87d0cb
\ No newline at end of file
{"results": {"hendrycksTest-computer_security": {"acc": 0.24, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.042923469599092816}}, "versions": {"hendrycksTest-computer_security": 0}}
\ No newline at end of file
622f191ccfc7a597d99f39897ebe3f95a9ddce0e662fcfb411aa554b289bb355
\ No newline at end of file
{"results": {"hendrycksTest-conceptual_physics": {"acc": 0.2680851063829787, "acc_norm": 0.2553191489361702, "acc_norm_stderr": 0.028504856470514185, "acc_stderr": 0.028957342788342347}}, "versions": {"hendrycksTest-conceptual_physics": 0}}
\ No newline at end of file
cde76ba2c7382b4876e17136c94f52aca2774e50342ab757b2a2d18da370dcb6
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment