Unverified Commit e53eb332 authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #477 from juletx/results

Add results of various models in json and md format
parents d1327193 92a50856
This diff is collapsed.
{
"results": {
"arc_challenge": {
"acc": 0.439419795221843,
"acc_stderr": 0.014503747823580122,
"acc_norm": 0.4462457337883959,
"acc_norm_stderr": 0.014526705548539982
},
"prost": {
"acc": 0.2688941076003416,
"acc_stderr": 0.0032393206239968247,
"acc_norm": 0.3052412467976089,
"acc_norm_stderr": 0.003364432149066356
},
"swag": {
"acc": 0.5673298010596821,
"acc_stderr": 0.003502894135944166,
"acc_norm": 0.6934919524142757,
"acc_norm_stderr": 0.0032596605453371346
},
"arc_easy": {
"acc": 0.7457912457912458,
"acc_stderr": 0.008934537681141528,
"acc_norm": 0.5989057239057239,
"acc_norm_stderr": 0.010057051106534378
},
"boolq": {
"acc": 0.6850152905198776,
"acc_stderr": 0.00812432724981665
},
"wsc273": {
"acc": 0.8608058608058609,
"acc_stderr": 0.020988366070851
},
"mc_taco": {
"em": 0.10960960960960961,
"f1": 0.4753174430074593
},
"piqa": {
"acc": 0.7883569096844396,
"acc_stderr": 0.009530351270479397,
"acc_norm": 0.7910772578890098,
"acc_norm_stderr": 0.009485227030105093
},
"hellaswag": {
"acc": 0.5910177255526787,
"acc_stderr": 0.004906411984476791,
"acc_norm": 0.7623979286994622,
"acc_norm_stderr": 0.004247442237702478
},
"winogrande": {
"acc": 0.7016574585635359,
"acc_stderr": 0.012858885010030434
},
"copa": {
"acc": 0.9,
"acc_stderr": 0.030151134457776348
},
"openbookqa": {
"acc": 0.306,
"acc_stderr": 0.020629569998345403,
"acc_norm": 0.422,
"acc_norm_stderr": 0.022109039310618552
}
},
"versions": {
"arc_challenge": 0,
"prost": 0,
"swag": 0,
"arc_easy": 0,
"boolq": 1,
"wsc273": 0,
"mc_taco": 0,
"piqa": 0,
"hellaswag": 0,
"winogrande": 0,
"copa": 0,
"openbookqa": 0
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=/gaueko1/hizkuntza-ereduak/LLaMA/lm/huggingface/13B,use_accelerate=True",
"num_fewshot": 0,
"batch_size": "auto",
"device": "cuda:0",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
{
"results": {
"mnli_mismatched": {
"acc": 0.45351912123677784,
"acc_stderr": 0.005020956265665617
},
"wnli": {
"acc": 0.4647887323943662,
"acc_stderr": 0.0596130578497224
},
"sst": {
"acc": 0.6536697247706422,
"acc_stderr": 0.01612186710508361
},
"cola": {
"mcc": 0.0,
"mcc_stderr": 0.0
},
"mnli": {
"acc": 0.43555781966377993,
"acc_stderr": 0.005005063722742048
},
"qnli": {
"acc": 0.4995423759838916,
"acc_stderr": 0.006765407718154766
},
"mrpc": {
"acc": 0.6862745098039216,
"acc_stderr": 0.022999936277943434,
"f1": 0.8134110787172011,
"f1_stderr": 0.01621238238910757
},
"rte": {
"acc": 0.6534296028880866,
"acc_stderr": 0.02864445699455754
},
"qqp": {
"acc": 0.3679198614889933,
"acc_stderr": 0.0023983700314094665,
"f1": 0.5365853658536586,
"f1_stderr": 0.0025607085094365924
}
},
"versions": {
"mnli_mismatched": 0,
"wnli": 1,
"sst": 0,
"cola": 0,
"mnli": 0,
"qnli": 0,
"mrpc": 0,
"rte": 0,
"qqp": 0
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=/gaueko1/hizkuntza-ereduak/LLaMA/lm/huggingface/13B,use_accelerate=True",
"num_fewshot": 0,
"batch_size": "auto",
"device": "cuda:0",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
{
"results": {
"gsm8k": {
"acc": 0.13570887035633056,
"acc_stderr": 0.009433577908567345
}
},
"versions": {
"gsm8k": 0
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=/gaueko1/hizkuntza-ereduak/LLaMA/lm/huggingface/13B,use_accelerate=True",
"num_fewshot": 8,
"batch_size": "auto",
"device": "cuda",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
This diff is collapsed.
{
"results": {
"lambada_openai": {
"ppl": 1279051.053451683,
"ppl_stderr": 60995.62964377304,
"acc": 0.0,
"acc_stderr": 0.0
},
"lambada_openai_mt_de": {
"ppl": 1310285.4433720284,
"ppl_stderr": 71395.90633942866,
"acc": 0.0,
"acc_stderr": 0.0
},
"lambada_openai_mt_it": {
"ppl": 4091504.352954044,
"ppl_stderr": 218020.965277226,
"acc": 0.0,
"acc_stderr": 0.0
},
"lambada_standard": {
"ppl": 1409047.9981006894,
"ppl_stderr": 47832.883755899915,
"acc": 0.0,
"acc_stderr": 0.0
},
"lambada_standard_cloze": {
"ppl": 4235345.031433833,
"ppl_stderr": 132892.5654001927,
"acc": 0.0,
"acc_stderr": 0.0
},
"lambada_openai_mt_fr": {
"ppl": 2461448.491005768,
"ppl_stderr": 128013.98724687536,
"acc": 0.0,
"acc_stderr": 0.0
},
"lambada_openai_cloze": {
"ppl": 204515.38946166556,
"ppl_stderr": 9705.341358126625,
"acc": 0.00019406171162429653,
"acc_stderr": 0.00019406171162430135
},
"lambada_openai_mt_en": {
"ppl": 1279051.053451683,
"ppl_stderr": 60995.62964377304,
"acc": 0.0,
"acc_stderr": 0.0
},
"lambada_openai_mt_es": {
"ppl": 1980241.7718905837,
"ppl_stderr": 101614.2034914904,
"acc": 0.0,
"acc_stderr": 0.0
}
},
"versions": {
"lambada_openai": 0,
"lambada_openai_mt_de": 0,
"lambada_openai_mt_it": 0,
"lambada_standard": 0,
"lambada_standard_cloze": 0,
"lambada_openai_mt_fr": 0,
"lambada_openai_cloze": 0,
"lambada_openai_mt_en": 0,
"lambada_openai_mt_es": 0
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=/gaueko1/hizkuntza-ereduak/LLaMA/lm/huggingface/13B",
"num_fewshot": 0,
"batch_size": "auto",
"device": "cuda:0",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment