Commit 7604b873 authored by cardy20's avatar cardy20
Browse files

conflict changed

parents 17b04444 e8f38aee
{
"results": {
"gsm8k": {
"acc": 0.012130401819560273,
"acc_stderr": 0.0030152942428909486
}
},
"versions": {
"gsm8k": 0
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=bigscience/bloom-3b,use_accelerate=True",
"num_fewshot": 8,
"batch_size": "auto",
"device": "cuda",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
{
"results": {
"math_counting_and_prob": {
"acc": 0.002109704641350211,
"acc_stderr": 0.00210970464135021
},
"math_algebra": {
"acc": 0.0,
"acc_stderr": 0.0
},
"math_precalc": {
"acc": 0.0,
"acc_stderr": 0.0
},
"gsm8k": {
"acc": 0.0,
"acc_stderr": 0.0
},
"math_prealgebra": {
"acc": 0.001148105625717566,
"acc_stderr": 0.0011481056257175708
},
"math_geometry": {
"acc": 0.0,
"acc_stderr": 0.0
},
"drop": {
"em": 0.02097315436241611,
"em_stderr": 0.001467468637213982,
"f1": 0.04631921140939603,
"f1_stderr": 0.001664167972365937
},
"math_num_theory": {
"acc": 0.001851851851851852,
"acc_stderr": 0.0018518518518518448
},
"math_intermediate_algebra": {
"acc": 0.0,
"acc_stderr": 0.0
},
"mathqa": {
"acc": 0.2525963149078727,
"acc_stderr": 0.007954112207299597,
"acc_norm": 0.25058626465661643,
"acc_norm_stderr": 0.00793304734353984
}
},
"versions": {
"math_counting_and_prob": 1,
"math_algebra": 1,
"math_precalc": 1,
"mathqa": 0,
"gsm8k": 0,
"math_prealgebra": 1,
"math_geometry": 1,
"drop": 1,
"math_num_theory": 1,
"math_intermediate_algebra": 1
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=bigscience/bloom-3b,use_accelerate=True",
"num_fewshot": 5,
"batch_size": "auto",
"device": "cuda:0",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
{
"results": {
"pawsx_es": {
"acc": 0.564,
"acc_stderr": 0.011091145421162657
},
"pawsx_ja": {
"acc": 0.446,
"acc_stderr": 0.011117724672834362
},
"pawsx_ko": {
"acc": 0.463,
"acc_stderr": 0.011152474561478177
},
"pawsx_zh": {
"acc": 0.471,
"acc_stderr": 0.011164310140373722
},
"pawsx_en": {
"acc": 0.568,
"acc_stderr": 0.011079231683079107
},
"pawsx_de": {
"acc": 0.546,
"acc_stderr": 0.0111357084193598
},
"pawsx_fr": {
"acc": 0.476,
"acc_stderr": 0.011170245619215438
}
},
"versions": {
"pawsx_es": 0,
"pawsx_ja": 0,
"pawsx_ko": 0,
"pawsx_zh": 0,
"pawsx_en": 0,
"pawsx_de": 0,
"pawsx_fr": 0
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=bigscience/bloom-3b",
"num_fewshot": 0,
"batch_size": "auto",
"device": "cuda",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
{
"results": {
"squad2": {
"exact": 6.914848816642803,
"f1": 11.511512971067512,
"HasAns_exact": 11.099865047233468,
"HasAns_f1": 20.306375422652543,
"NoAns_exact": 2.7417998317914214,
"NoAns_f1": 2.7417998317914214,
"best_exact": 50.07159100480081,
"best_f1": 50.08024690773861
},
"logiqa": {
"acc": 0.2073732718894009,
"acc_stderr": 0.015902084913876333,
"acc_norm": 0.29185867895545314,
"acc_norm_stderr": 0.017831570553971925
},
"headqa_en": {
"acc": 0.2840991976659373,
"acc_stderr": 0.008614040521644994,
"acc_norm": 0.3336980306345733,
"acc_norm_stderr": 0.009006537310888562
},
"truthfulqa_mc": {
"mc1": 0.23255813953488372,
"mc1_stderr": 0.014789157531080503,
"mc2": 0.40572206357204965,
"mc2_stderr": 0.014390512893375817
},
"webqs": {
"acc": 0.01673228346456693,
"acc_stderr": 0.0028461549169432184
},
"triviaqa": {
"acc": 0.04154512507734465,
"acc_stderr": 0.0018761872163031025
},
"headqa_es": {
"acc": 0.26440554339897887,
"acc_stderr": 0.008423643607316284,
"acc_norm": 0.3099927060539752,
"acc_norm_stderr": 0.008833810133604958
}
},
"versions": {
"squad2": 1,
"logiqa": 0,
"headqa_en": 0,
"truthfulqa_mc": 1,
"webqs": 0,
"triviaqa": 1,
"headqa_es": 0
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=bigscience/bloom-3b,use_accelerate=True",
"num_fewshot": 0,
"batch_size": "auto",
"device": "cuda:0",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment