Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9c4967bc
Commit
9c4967bc
authored
May 11, 2021
by
Leo Gao
Browse files
Multithread bootstrapping
parent
c77b60c1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
6 deletions
+19
-6
lm_eval/metrics.py
lm_eval/metrics.py
+19
-6
No files found.
lm_eval/metrics.py
View file @
9c4967bc
...
...
@@ -178,24 +178,37 @@ def _sacreformat(refs, preds):
## stderr stuff
class
_bootstrap_internal
:
def
__init__
(
self
,
f
,
n
):
self
.
f
=
f
self
.
n
=
n
def
__call__
(
self
,
v
):
i
,
xs
=
v
rnd
=
random
.
Random
()
rnd
.
seed
(
i
)
res
=
[]
for
_
in
range
(
self
.
n
):
res
.
append
(
self
.
f
(
rnd
.
choices
(
xs
,
k
=
len
(
xs
))))
return
res
def
bootstrap_stderr
(
f
,
xs
,
iters
=
100000
):
import
multiprocessing
as
mp
pool
=
mp
.
Pool
(
mp
.
cpu_count
())
# this gives a biased estimate of the stderr (i.e w/ the mean, it gives something
# equivalent to stderr calculated without Bessel's correction in the stddev.
# Unfortunately, I haven't been able to figure out what the right correction is
# to make the bootstrap unbiased - i considered multiplying by sqrt(n/(n-1)) but
# that would be ad-hoc and I can't prove that that would actually be an unbiased estimator)
# Thankfully, shouldn't matter because our samples are pretty big usually anyways
rnd
=
random
.
Random
()
rnd
.
seed
(
42
)
res
=
[]
from
tqdm
import
t
range
from
tqdm
import
t
qdm
print
(
"bootstrapping for stddev:"
,
f
.
__name__
)
for
i
in
trange
(
iters
):
for
bootstrap
in
tqdm
(
pool
.
imap
(
_bootstrap_internal
(
f
,
1000
),
[(
i
,
xs
)
for
i
in
range
(
iters
//
1000
)]),
total
=
iters
//
1000
):
# sample w replacement
bootstrap
=
f
(
rnd
.
choices
(
xs
,
k
=
len
(
xs
)))
res
.
append
(
bootstrap
)
res
.
extend
(
bootstrap
)
pool
.
close
()
return
sample_stddev
(
res
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment