Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9f58d1a8
Unverified
Commit
9f58d1a8
authored
May 24, 2023
by
Stella Biderman
Committed by
GitHub
May 24, 2023
Browse files
Merge branch 'polyglot' into feature/kold
parents
33e813ee
1927699a
Changes
203
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
184 additions
and
0 deletions
+184
-0
results/bloom/bloom-7b1/bloom-7b1_common_sense_reasoning_0-shot.json
...om/bloom-7b1/bloom-7b1_common_sense_reasoning_0-shot.json
+91
-0
results/bloom/bloom-7b1/bloom-7b1_gsm8k_8-shot.json
results/bloom/bloom-7b1/bloom-7b1_gsm8k_8-shot.json
+22
-0
results/bloom/bloom-7b1/bloom-7b1_mathematical_reasoning_few_shot_5-shot.json
...7b1/bloom-7b1_mathematical_reasoning_few_shot_5-shot.json
+71
-0
No files found.
Too many changes to show.
To preserve performance only
203 of 203+
files are displayed.
Plain diff
Email patch
results/bloom/bloom-7b1/bloom-7b1_common_sense_reasoning_0-shot.json
0 → 100644
View file @
9f58d1a8
{
"results"
:
{
"copa"
:
{
"acc"
:
0.72
,
"acc_stderr"
:
0.04512608598542127
},
"winogrande"
:
{
"acc"
:
0.6432517758484609
,
"acc_stderr"
:
0.013463393958028726
},
"piqa"
:
{
"acc"
:
0.7274211099020674
,
"acc_stderr"
:
0.010389256803296021
,
"acc_norm"
:
0.7366702937976061
,
"acc_norm_stderr"
:
0.010276185322196764
},
"arc_challenge"
:
{
"acc"
:
0.3037542662116041
,
"acc_stderr"
:
0.013438909184778757
,
"acc_norm"
:
0.33532423208191126
,
"acc_norm_stderr"
:
0.013796182947785564
},
"arc_easy"
:
{
"acc"
:
0.6494107744107744
,
"acc_stderr"
:
0.009791003829831557
,
"acc_norm"
:
0.5732323232323232
,
"acc_norm_stderr"
:
0.010149141043955626
},
"boolq"
:
{
"acc"
:
0.6287461773700306
,
"acc_stderr"
:
0.008450174658715903
},
"wsc273"
:
{
"acc"
:
0.8131868131868132
,
"acc_stderr"
:
0.023632761722644544
},
"openbookqa"
:
{
"acc"
:
0.252
,
"acc_stderr"
:
0.019435727282249536
,
"acc_norm"
:
0.358
,
"acc_norm_stderr"
:
0.021461434862859122
},
"prost"
:
{
"acc"
:
0.26184884713919726
,
"acc_stderr"
:
0.003211967450351038
,
"acc_norm"
:
0.30572160546541416
,
"acc_norm_stderr"
:
0.003365914208405272
},
"mc_taco"
:
{
"em"
:
0.13588588588588588
,
"f1"
:
0.5052611696967991
},
"hellaswag"
:
{
"acc"
:
0.4623580959968134
,
"acc_stderr"
:
0.0049756211474061025
,
"acc_norm"
:
0.5967934674367655
,
"acc_norm_stderr"
:
0.0048953903414456264
},
"swag"
:
{
"acc"
:
0.5024992502249325
,
"acc_stderr"
:
0.0035350478846161142
,
"acc_norm"
:
0.6825952214335699
,
"acc_norm_stderr"
:
0.0032909332559412758
}
},
"versions"
:
{
"copa"
:
0
,
"winogrande"
:
0
,
"piqa"
:
0
,
"arc_challenge"
:
0
,
"arc_easy"
:
0
,
"boolq"
:
1
,
"wsc273"
:
0
,
"openbookqa"
:
0
,
"prost"
:
0
,
"mc_taco"
:
0
,
"hellaswag"
:
0
,
"swag"
:
0
},
"config"
:
{
"model"
:
"hf-causal-experimental"
,
"model_args"
:
"pretrained=bigscience/bloom-7b1,use_accelerate=True"
,
"num_fewshot"
:
0
,
"batch_size"
:
"auto"
,
"device"
:
"cuda:0"
,
"no_cache"
:
true
,
"limit"
:
null
,
"bootstrap_iters"
:
100000
,
"description_dict"
:
{}
}
}
results/bloom/bloom-7b1/bloom-7b1_gsm8k_8-shot.json
0 → 100644
View file @
9f58d1a8
{
"results"
:
{
"gsm8k"
:
{
"acc"
:
0.018953752843062926
,
"acc_stderr"
:
0.0037560783410314704
}
},
"versions"
:
{
"gsm8k"
:
0
},
"config"
:
{
"model"
:
"hf-causal-experimental"
,
"model_args"
:
"pretrained=bigscience/bloom-7b1,use_accelerate=True"
,
"num_fewshot"
:
8
,
"batch_size"
:
"auto"
,
"device"
:
"cuda"
,
"no_cache"
:
true
,
"limit"
:
null
,
"bootstrap_iters"
:
100000
,
"description_dict"
:
{}
}
}
results/bloom/bloom-7b1/bloom-7b1_mathematical_reasoning_few_shot_5-shot.json
0 → 100644
View file @
9f58d1a8
{
"results"
:
{
"mathqa"
:
{
"acc"
:
0.26566164154103855
,
"acc_stderr"
:
0.008085616216226046
,
"acc_norm"
:
0.26532663316582916
,
"acc_norm_stderr"
:
0.008082359462649721
},
"math_prealgebra"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
},
"drop"
:
{
"em"
:
0.02506291946308725
,
"em_stderr"
:
0.0016008246934367681
,
"f1"
:
0.05092911073825512
,
"f1_stderr"
:
0.0017766603696206904
},
"math_precalc"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
},
"math_geometry"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
},
"gsm8k"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
},
"math_counting_and_prob"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
},
"math_num_theory"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
},
"math_algebra"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
},
"math_intermediate_algebra"
:
{
"acc"
:
0.0
,
"acc_stderr"
:
0.0
}
},
"versions"
:
{
"mathqa"
:
0
,
"math_prealgebra"
:
1
,
"drop"
:
1
,
"math_precalc"
:
1
,
"math_geometry"
:
1
,
"gsm8k"
:
0
,
"math_counting_and_prob"
:
1
,
"math_num_theory"
:
1
,
"math_algebra"
:
1
,
"math_intermediate_algebra"
:
1
},
"config"
:
{
"model"
:
"hf-causal-experimental"
,
"model_args"
:
"pretrained=bigscience/bloom-7b1,use_accelerate=True"
,
"num_fewshot"
:
5
,
"batch_size"
:
"auto"
,
"device"
:
"cuda:0"
,
"no_cache"
:
true
,
"limit"
:
null
,
"bootstrap_iters"
:
100000
,
"description_dict"
:
{}
}
}
Prev
1
…
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment