Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7f8d612d
"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "a47e6ffe9366516ea5ca28e27fc87367a869e854"
Unverified
Commit
7f8d612d
authored
Jul 30, 2024
by
Earthwalker
Committed by
GitHub
Jul 29, 2024
Browse files
[TPU] Support tensor parallelism in async llm engine (#6891)
parent
60d1c6e5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
2 deletions
+11
-2
Dockerfile.tpu
Dockerfile.tpu
+3
-0
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+8
-2
No files found.
Dockerfile.tpu
View file @
7f8d612d
...
@@ -12,6 +12,9 @@ RUN pip install "numpy<2"
...
@@ -12,6 +12,9 @@ RUN pip install "numpy<2"
RUN pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html
RUN pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html
RUN pip install torch_xla[pallas] -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html
RUN pip install torch_xla[pallas] -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html
# Fix FastAPI dependence
RUN pip install "starlette<0.38.0"
# Build vLLM.
# Build vLLM.
COPY . /workspace/vllm
COPY . /workspace/vllm
ENV VLLM_TARGET_DEVICE="tpu"
ENV VLLM_TARGET_DEVICE="tpu"
...
...
vllm/engine/async_llm_engine.py
View file @
7f8d612d
...
@@ -407,8 +407,14 @@ class AsyncLLMEngine:
...
@@ -407,8 +407,14 @@ class AsyncLLMEngine:
from
vllm.executor.neuron_executor
import
NeuronExecutorAsync
from
vllm.executor.neuron_executor
import
NeuronExecutorAsync
executor_class
=
NeuronExecutorAsync
executor_class
=
NeuronExecutorAsync
elif
engine_config
.
device_config
.
device_type
==
"tpu"
:
elif
engine_config
.
device_config
.
device_type
==
"tpu"
:
from
vllm.executor.tpu_executor
import
TPUExecutorAsync
if
distributed_executor_backend
==
"ray"
:
executor_class
=
TPUExecutorAsync
initialize_ray_cluster
(
engine_config
.
parallel_config
)
from
vllm.executor.ray_tpu_executor
import
RayTPUExecutorAsync
executor_class
=
RayTPUExecutorAsync
else
:
assert
distributed_executor_backend
is
None
from
vllm.executor.tpu_executor
import
TPUExecutorAsync
executor_class
=
TPUExecutorAsync
elif
engine_config
.
device_config
.
device_type
==
"cpu"
:
elif
engine_config
.
device_config
.
device_type
==
"cpu"
:
from
vllm.executor.cpu_executor
import
CPUExecutorAsync
from
vllm.executor.cpu_executor
import
CPUExecutorAsync
executor_class
=
CPUExecutorAsync
executor_class
=
CPUExecutorAsync
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment