Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2efce05d
Unverified
Commit
2efce05d
authored
Mar 05, 2024
by
Nick Hill
Committed by
GitHub
Mar 06, 2024
Browse files
[Fix] Avoid pickling entire LLMEngine for Ray workers (#3207)
Co-authored-by:
Antoni Baum
<
antoni.baum@protonmail.com
>
parent
8999ec3c
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
7 deletions
+14
-7
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+14
-7
No files found.
vllm/engine/llm_engine.py
View file @
2efce05d
...
...
@@ -158,6 +158,11 @@ class LLMEngine:
if
USE_RAY_COMPILED_DAG
:
self
.
forward_dag
=
self
.
_compiled_ray_dag
()
def
__reduce__
(
self
):
# This is to ensure that the LLMEngine is not referenced in
# the closure used to initialize Ray worker actors
raise
RuntimeError
(
"LLMEngine should not be pickled!"
)
def
get_tokenizer_for_seq
(
self
,
sequence
:
Sequence
):
return
self
.
tokenizer
.
get_lora_tokenizer
(
sequence
.
lora_request
)
...
...
@@ -280,6 +285,8 @@ class LLMEngine:
parallel_config
=
copy
.
deepcopy
(
self
.
parallel_config
)
scheduler_config
=
copy
.
deepcopy
(
self
.
scheduler_config
)
device_config
=
copy
.
deepcopy
(
self
.
device_config
)
lora_config
=
copy
.
deepcopy
(
self
.
lora_config
)
kv_cache_dtype
=
self
.
cache_config
.
cache_dtype
for
rank
,
(
worker
,
(
node_id
,
_
))
in
enumerate
(
zip
(
self
.
workers
,
...
...
@@ -295,22 +302,22 @@ class LLMEngine:
local_rank
,
rank
,
distributed_init_method
,
lora_config
=
self
.
lora_config
,
kv_cache_dtype
=
self
.
cache_config
.
cache_dtype
,
lora_config
=
lora_config
,
kv_cache_dtype
=
kv_
cache_dtype
,
))
driver_rank
=
0
driver_local_rank
=
node_workers
[
driver_node_id
].
index
(
driver_rank
)
self
.
driver_worker
=
Worker
(
model_config
,
parallel_config
,
scheduler_config
,
device_config
,
self
.
model_config
,
self
.
parallel_config
,
self
.
scheduler_config
,
self
.
device_config
,
driver_local_rank
,
driver_rank
,
distributed_init_method
,
lora_config
=
self
.
lora_config
,
kv_cache_dtype
=
self
.
cache_config
.
cache_dtype
,
kv_cache_dtype
=
kv_
cache_dtype
,
is_driver_worker
=
True
,
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment