Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
5075de60
Commit
5075de60
authored
Nov 29, 2023
by
baberabb
Browse files
fix chunking of inputs
parent
9c0d4e93
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
10 deletions
+13
-10
lm_eval/models/vllm_causallms.py
lm_eval/models/vllm_causallms.py
+13
-10
No files found.
lm_eval/models/vllm_causallms.py
View file @
5075de60
from
collections
import
defaultdict
from
typing
import
List
,
Tuple
,
Optional
,
Literal
,
Union
from
itertools
import
islice
from
typing
import
List
,
Tuple
,
Optional
,
Literal
,
Union
,
Any
from
transformers
import
AutoTokenizer
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.model
import
LM
...
...
@@ -24,6 +25,11 @@ def run_inference_one_gpu(model_args: dict, sampling_params, requests: List[int]
return
llm
.
generate
(
prompt_token_ids
=
requests
,
sampling_params
=
sampling_params
)
def
chunk_list
(
my_list
:
List
[
Any
],
chunk_size
:
int
):
for
i
in
range
(
0
,
len
(
my_list
),
chunk_size
):
yield
list
(
islice
(
my_list
,
i
,
i
+
chunk_size
))
@
register_model
(
"vllm"
)
class
VLLM
(
LM
):
_DEFAULT_MAX_LENGTH
=
2048
...
...
@@ -137,16 +143,13 @@ please install vllm via `pip install lm-eval[vllm]` or `pip install -e .[vllm]`"
temperature
=
0
,
prompt_logprobs
=
2
,
max_tokens
=
1
)
if
self
.
data_parallel
>
1
:
req_list
=
[]
for
replicas
in
range
(
self
.
data_parallel
):
reqs
=
utils
.
create_iterator
(
requests
,
rank
=
replicas
,
world_size
=
self
.
data_parallel
)
req_list
.
append
(
reqs
)
inputs
=
[(
self
.
model_args
,
sampling_params
,
req
)
for
req
in
req_list
]
requests
=
chunk_list
(
requests
,
self
.
data_parallel
)
inputs
=
[(
self
.
model_args
,
sampling_params
,
req
)
for
req
in
requests
]
with
Pool
(
processes
=
self
.
data_parallel
)
as
pool
:
results
=
pool
.
starmap
(
run_inference_one_gpu
,
inputs
)
with
Pool
()
as
pool
:
results
=
pool
.
starmap
(
run_inference_one_gpu
,
inputs
,
self
.
data_parallel
)
# flatten results
return
[
item
for
sublist
in
results
for
item
in
sublist
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment