Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
02249855
Commit
02249855
authored
May 10, 2021
by
Leo Gao
Browse files
Use sample stddev
parent
82a538d3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
4 deletions
+16
-4
lm_eval/metrics.py
lm_eval/metrics.py
+8
-4
tests/test_misc.py
tests/test_misc.py
+8
-0
No files found.
lm_eval/metrics.py
View file @
02249855
...
@@ -12,14 +12,18 @@ def mean(arr):
...
@@ -12,14 +12,18 @@ def mean(arr):
return
sum
(
arr
)
/
len
(
arr
)
return
sum
(
arr
)
/
len
(
arr
)
def
stddev
(
arr
):
def
pop_
stddev
(
arr
):
mu
=
mean
(
arr
)
mu
=
mean
(
arr
)
return
math
.
sqrt
(
sum
([(
x
-
mu
)
**
2
for
x
in
arr
])
/
len
(
arr
))
return
math
.
sqrt
(
sum
([(
x
-
mu
)
**
2
for
x
in
arr
])
/
len
(
arr
))
def
sample_stddev
(
arr
):
mu
=
mean
(
arr
)
return
math
.
sqrt
(
sum
([(
x
-
mu
)
**
2
for
x
in
arr
])
/
(
len
(
arr
)
-
1
))
def
mean_stderr
(
arr
):
def
mean_stderr
(
arr
):
print
(
stddev
(
arr
),
len
(
arr
))
return
sample_stddev
(
arr
)
/
math
.
sqrt
(
len
(
arr
))
return
stddev
(
arr
)
/
math
.
sqrt
(
len
(
arr
))
def
median
(
arr
):
def
median
(
arr
):
...
@@ -181,7 +185,7 @@ def bootstrap_stderr(f, xs, iters=10000):
...
@@ -181,7 +185,7 @@ def bootstrap_stderr(f, xs, iters=10000):
bootstrap
=
f
(
rnd
.
choices
(
xs
,
k
=
len
(
xs
)))
bootstrap
=
f
(
rnd
.
choices
(
xs
,
k
=
len
(
xs
)))
res
.
append
(
bootstrap
)
res
.
append
(
bootstrap
)
return
stddev
(
res
)
return
sample_
stddev
(
res
)
def
stderr_for_metric
(
metric
):
def
stderr_for_metric
(
metric
):
...
...
tests/test_misc.py
View file @
02249855
...
@@ -10,3 +10,11 @@ def test_bootstrapping():
...
@@ -10,3 +10,11 @@ def test_bootstrapping():
bootstrapped
=
metrics
.
bootstrap_stderr
(
metrics
.
mean
,
arr
,
iters
=
100000
)
bootstrapped
=
metrics
.
bootstrap_stderr
(
metrics
.
mean
,
arr
,
iters
=
100000
)
assert
bootstrapped
==
pytest
.
approx
(
expected
,
abs
=
1e-4
)
assert
bootstrapped
==
pytest
.
approx
(
expected
,
abs
=
1e-4
)
def
test_bootstrapping_stella
():
arr
=
[
0.1
,
0.3
,
0.2
,
0.25
,
0.3
,
0.1
,
0.22
]
expected
=
metrics
.
mean_stderr
(
arr
)
bootstrapped
=
metrics
.
bootstrap_stderr
(
metrics
.
mean
,
arr
,
iters
=
100000
)
assert
bootstrapped
==
pytest
.
approx
(
expected
,
abs
=
1e-5
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment