Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
eddd627a
Commit
eddd627a
authored
May 05, 2023
by
Benjamin Fattori
Browse files
add utility func for slicing iterators, only tqdm on main process
parent
629bcfba
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
11 deletions
+17
-11
lm_eval/api/task.py
lm_eval/api/task.py
+1
-2
lm_eval/evaluator.py
lm_eval/evaluator.py
+4
-6
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+2
-2
lm_eval/utils.py
lm_eval/utils.py
+10
-1
No files found.
lm_eval/api/task.py
View file @
eddd627a
...
...
@@ -260,8 +260,7 @@ class Task(abc.ABC):
),
f
"Task dataset (path=
{
self
.
DATASET_PATH
}
, name=
{
self
.
DATASET_NAME
}
) must have valid or test docs!"
instances
=
[]
# for doc_id, doc in enumerate(itertools.islice(docs, 0, limit) if limit else docs):
for
doc_id
,
doc
in
itertools
.
islice
(
enumerate
(
docs
),
rank
,
None
,
world_size
):
for
doc_id
,
doc
in
utils
.
create_iterator
(
enumerate
(
docs
),
rank
,
world_size
,
limit
):
# sample fewshot context #TODO: need to offset doc_id by rank now!
fewshot_ctx
=
self
.
fewshot_context
(
doc
,
self
.
_config
.
num_fewshot
,
rnd
=
random
.
Random
()
...
...
lm_eval/evaluator.py
View file @
eddd627a
...
...
@@ -6,7 +6,7 @@ import lm_eval.api.metrics
import
lm_eval.models
import
lm_eval.tasks
import
lm_eval.api
from
lm_eval.utils
import
positional_deprecated
,
run_task_tests
,
make_table
from
lm_eval.utils
import
positional_deprecated
,
run_task_tests
,
make_table
,
create_iterator
import
torch
@
positional_deprecated
...
...
@@ -146,7 +146,6 @@ def evaluate(
# rnd.seed(42)
# rnd.shuffle(task_docs)
# for doc_id, doc in enumerate(itertools.islice(task_docs, 0, limit)):
task
.
build_all_requests
(
limit
=
limit
,
rank
=
lm
.
rank
,
world_size
=
lm
.
world_size
)
# aggregate Instances by LM method requested to get output.
reqtype
=
"loglikelihood"
if
task
.
OUTPUT_TYPE
==
"multiple_choice"
else
task
.
OUTPUT_TYPE
#TODO: this is hacky, fix in task.py
...
...
@@ -156,11 +155,9 @@ def evaluate(
instances_rnk
=
torch
.
tensor
(
len
(
task
.
_instances
),
device
=
lm
.
device
)
gathered_item
=
lm
.
accelerator
.
gather
(
instances_rnk
).
cpu
().
detach
().
numpy
().
tolist
()
# compute number of pseudobatches to pad with (FSDP/DDP require even batches
+ can't use join
)
# compute number of pseudobatches to pad with (FSDP/DDP require even batches
among ranks
)
# we assume rank 0 always has largest iterator
numpad
=
gathered_item
[
0
]
-
gathered_item
[
lm
.
rank
]
if
numpad
>
0
:
print
(
f
"
{
task_name
}
/ balancing iterators across ranks / rank:
{
lm
.
rank
}
/ +
{
numpad
}
sample"
)
### Run LM on inputs, get all outputs ###
# execute each type of request
...
...
@@ -200,7 +197,8 @@ def evaluate(
# calculate values for each filter setup (TODO: make getting list of keys cleaner)
# TODO: make it possible to use a different metric per key
for
key
in
task
.
instances
[
0
].
filtered_resps
.
keys
():
for
doc_id
,
doc
in
itertools
.
islice
(
enumerate
(
task
.
test_docs
()),
lm
.
rank
,
None
,
lm
.
world_size
)
if
task
.
has_test_docs
()
else
itertools
.
islice
(
enumerate
(
task
.
validation_docs
()),
lm
.
rank
,
None
,
lm
.
world_size
):
doc_iterator
=
itertools
.
islice
(
enumerate
(
task
.
test_docs
()),
lm
.
rank
,
None
,
lm
.
world_size
)
if
task
.
has_test_docs
()
else
itertools
.
islice
(
enumerate
(
task
.
validation_docs
()),
lm
.
rank
,
None
,
lm
.
world_size
)
for
doc_id
,
doc
in
doc_iterator
:
# subset instances to only this document id ; sort by idx
requests
=
list
(
filter
(
lambda
x
:
x
.
doc_id
==
doc_id
,
task
.
instances
))
requests
.
sort
(
key
=
lambda
x
:
x
.
idx
)
...
...
lm_eval/models/gpt2.py
View file @
eddd627a
...
...
@@ -73,7 +73,7 @@ class HFLM(LM):
self
.
accelerator
=
accelerator
if
self
.
accelerator
.
is_local_main_process
:
print
(
f
"Using
{
gpus
}
GPUs with
FullySharded
DataParal
ell and accelerate
"
)
print
(
f
"Using
{
gpus
}
GPUs with Data
Paral
lelism
"
)
self
.
_rank
=
self
.
accelerator
.
local_process_index
self
.
_world_size
=
gpus
...
...
@@ -202,7 +202,7 @@ class HFLM(LM):
# TODO: automatic (variable) batch size detection for vectorization
re_ord
=
utils
.
Reorderer
(
requests
,
_collate
)
for
chunk
in
utils
.
chunks
(
tqdm
(
re_ord
.
get_reordered
(),
disable
=
disable_tqdm
),
self
.
batch_size
tqdm
(
re_ord
.
get_reordered
(),
disable
=
(
disable_tqdm
or
not
(
self
.
rank
==
0
))
),
self
.
batch_size
):
inps
=
[]
cont_toks_list
=
[]
...
...
lm_eval/utils.py
View file @
eddd627a
...
...
@@ -9,7 +9,7 @@ from typing import List
from
omegaconf
import
OmegaConf
from
jinja2
import
BaseLoader
,
Environment
,
StrictUndefined
from
itertools
import
islice
class
ExitCodeError
(
Exception
):
pass
...
...
@@ -246,3 +246,12 @@ env = Environment(loader=BaseLoader, undefined=StrictUndefined)
def
apply_template
(
template
,
doc
):
rtemplate
=
env
.
from_string
(
template
)
return
rtemplate
.
render
(
**
doc
)
def
create_iterator
(
raw_iterator
,
rank
,
world_size
,
limit
=
None
):
"""
Method for creating a (potentially) sliced and limited
iterator from a raw document iterator. Used for splitting data
among ranks in multigpu setting or only pulling a sample of documents
"""
return
islice
(
raw_iterator
,
rank
,
limit
,
world_size
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment