Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
28b3a1c7
Unverified
Commit
28b3a1c7
authored
Dec 10, 2024
by
Tyler Michael Smith
Committed by
GitHub
Dec 10, 2024
Browse files
[V1] Multiprocessing Tensor Parallel Support for v1 (#9856)
Signed-off-by:
Tyler Michael Smith
<
tyler@neuralmagic.com
>
parent
bc192a2b
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
2 deletions
+9
-2
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/gpu_worker.py
+9
-2
No files found.
vllm/v1/worker/gpu_worker.py
View file @
28b3a1c7
...
@@ -15,6 +15,7 @@ from vllm.logger import init_logger
...
@@ -15,6 +15,7 @@ from vllm.logger import init_logger
from
vllm.model_executor
import
set_random_seed
from
vllm.model_executor
import
set_random_seed
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
,
get_dtype_size
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
,
get_dtype_size
from
vllm.v1.core.scheduler
import
SchedulerOutput
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.v1.worker.gpu_model_runner
import
GPUModelRunner
from
vllm.v1.worker.gpu_model_runner
import
GPUModelRunner
...
@@ -56,7 +57,6 @@ class Worker:
...
@@ -56,7 +57,6 @@ class Worker:
from
vllm.utils
import
init_cached_hf_modules
from
vllm.utils
import
init_cached_hf_modules
init_cached_hf_modules
()
init_cached_hf_modules
()
self
.
model_runner
=
GPUModelRunner
(
vllm_config
)
# Torch profiler. Enabled and configured through env vars:
# Torch profiler. Enabled and configured through env vars:
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
if
envs
.
VLLM_TORCH_PROFILER_DIR
:
if
envs
.
VLLM_TORCH_PROFILER_DIR
:
...
@@ -103,6 +103,9 @@ class Worker:
...
@@ -103,6 +103,9 @@ class Worker:
# Set random seed.
# Set random seed.
set_random_seed
(
self
.
model_config
.
seed
)
set_random_seed
(
self
.
model_config
.
seed
)
# Construct the model runner
self
.
model_runner
=
GPUModelRunner
(
self
.
vllm_config
,
self
.
device
)
def
load_model
(
self
)
->
None
:
def
load_model
(
self
)
->
None
:
self
.
model_runner
.
load_model
()
self
.
model_runner
.
load_model
()
...
@@ -198,7 +201,7 @@ class Worker:
...
@@ -198,7 +201,7 @@ class Worker:
scheduler_output
:
"SchedulerOutput"
,
scheduler_output
:
"SchedulerOutput"
,
)
->
ModelRunnerOutput
:
)
->
ModelRunnerOutput
:
output
=
self
.
model_runner
.
execute_model
(
scheduler_output
)
output
=
self
.
model_runner
.
execute_model
(
scheduler_output
)
# TODO(woosuk): Send the output to the engine process.
return
output
if
self
.
rank
==
0
else
None
return
output
return
output
def
profile
(
self
,
is_start
=
True
):
def
profile
(
self
,
is_start
=
True
):
...
@@ -209,6 +212,10 @@ class Worker:
...
@@ -209,6 +212,10 @@ class Worker:
else
:
else
:
self
.
profiler
.
stop
()
self
.
profiler
.
stop
()
def
check_health
(
self
)
->
None
:
# worker will always be healthy as long as it's running.
return
def
init_worker_distributed_environment
(
def
init_worker_distributed_environment
(
parallel_config
:
ParallelConfig
,
parallel_config
:
ParallelConfig
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment