Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c417292d
Commit
c417292d
authored
Sep 05, 2023
by
lintangsutawika
Browse files
Merge branch 'big-refactor' of
https://github.com/EleutherAI/lm-evaluation-harness
into wmt
parents
3b4fa26e
1a02d9df
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
3 deletions
+16
-3
lm_eval/models/huggingface.py
lm_eval/models/huggingface.py
+3
-3
lm_eval/tasks/wikitext/preprocess_wikitext.py
lm_eval/tasks/wikitext/preprocess_wikitext.py
+12
-0
lm_eval/tasks/wikitext/wikitext.yaml
lm_eval/tasks/wikitext/wikitext.yaml
+1
-0
No files found.
lm_eval/models/huggingface.py
View file @
c417292d
...
...
@@ -296,14 +296,14 @@ class HFLM(LM):
)
else
:
assert
accelerator
.
distributed_type
in
[
DistributedType
.
FSDP
,
DistributedType
.
MULTI_GPU
DistributedType
.
FSDP
,
DistributedType
.
MULTI_GPU
,
],
"Unsupported distributed type provided. Only DDP and FSDP are supported."
if
accelerator
.
distributed_type
==
DistributedType
.
FSDP
:
self
.
_model
=
accelerator
.
prepare
(
self
.
model
)
else
:
self
.
_model
=
accelerator
.
prepare_model
(
self
.
model
,
evaluation_mode
=
True
self
.
model
,
evaluation_mode
=
True
)
self
.
_device
=
torch
.
device
(
f
"cuda:
{
accelerator
.
local_process_index
}
"
)
self
.
accelerator
=
accelerator
...
...
lm_eval/tasks/wikitext/preprocess_wikitext.py
View file @
c417292d
...
...
@@ -34,3 +34,15 @@ def wikitext_detokenizer(doc):
string
=
string
.
replace
(
" 's"
,
"'s"
)
return
string
def
process_results
(
doc
,
results
):
(
loglikelihood
,)
=
results
# IMPORTANT: wikitext counts number of words in *original doc before detokenization*
_words
=
len
(
re
.
split
(
r
"\s+"
,
doc
[
"page"
]))
_bytes
=
len
(
doc
[
"page"
].
encode
(
"utf-8"
))
return
{
"word_perplexity"
:
(
loglikelihood
,
_words
),
"byte_perplexity"
:
(
loglikelihood
,
_bytes
),
"bits_per_byte"
:
(
loglikelihood
,
_bytes
),
}
lm_eval/tasks/wikitext/wikitext.yaml
View file @
c417292d
...
...
@@ -7,6 +7,7 @@ validation_split: validation
test_split
:
test
doc_to_text
:
"
"
doc_to_target
:
!function
preprocess_wikitext.wikitext_detokenizer
process_results
:
!function
preprocess_wikitext.process_results
should_decontaminate
:
true
doc_to_decontamination_query
:
"
{{page}}"
metric_list
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment