Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
e94f4647
Unverified
Commit
e94f4647
authored
Feb 24, 2026
by
Ayush Agarwal
Committed by
GitHub
Feb 24, 2026
Browse files
chore: install vllm-omni in vllm container (#6458)
Signed-off-by:
ayushag
<
ayushag@nvidia.com
>
parent
efa89448
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
73 additions
and
32 deletions
+73
-32
components/src/dynamo/vllm/tests/test_vllm_omni_handler.py
components/src/dynamo/vllm/tests/test_vllm_omni_handler.py
+32
-25
components/src/dynamo/vllm/tests/test_vllm_renderer_api.py
components/src/dynamo/vllm/tests/test_vllm_renderer_api.py
+8
-4
container/context.yaml
container/context.yaml
+1
-0
container/deps/vllm/install_vllm.sh
container/deps/vllm/install_vllm.sh
+23
-3
container/templates/args.Dockerfile
container/templates/args.Dockerfile
+1
-0
container/templates/vllm_framework.Dockerfile
container/templates/vllm_framework.Dockerfile
+2
-0
container/templates/vllm_runtime.Dockerfile
container/templates/vllm_runtime.Dockerfile
+6
-0
No files found.
components/src/dynamo/vllm/tests/test_vllm_omni_handler.py
View file @
e94f4647
...
@@ -5,16 +5,11 @@ from unittest.mock import MagicMock, patch
...
@@ -5,16 +5,11 @@ from unittest.mock import MagicMock, patch
import
pytest
import
pytest
from
dynamo.common.protocols.image_protocol
import
NvCreateImageRequest
from
dynamo.common.protocols.video_protocol
import
NvCreateVideoRequest
from
dynamo.common.utils.output_modalities
import
RequestType
try
:
try
:
from
dynamo.vllm.omni.omni_handler
import
(
from
dynamo.common.protocols.image_protocol
import
NvCreateImageRequest
EngineInputs
,
from
dynamo.common.protocols.video_protocol
import
NvCreateVideoRequest
OmniHandler
,
from
dynamo.common.utils.output_modalities
import
RequestType
prepare_image_output
,
from
dynamo.vllm.omni.omni_handler
import
EngineInputs
,
OmniHandler
)
except
ImportError
:
except
ImportError
:
pytest
.
skip
(
"vLLM omni dependencies not available"
,
allow_module_level
=
True
)
pytest
.
skip
(
"vLLM omni dependencies not available"
,
allow_module_level
=
True
)
...
@@ -51,32 +46,40 @@ class TestEngineInputs:
...
@@ -51,32 +46,40 @@ class TestEngineInputs:
class
TestPrepareImageOutput
:
class
TestPrepareImageOutput
:
def
test_b64_json
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_b64_json
(
self
):
"""b64_json format returns data URI with base64 prefix."""
"""b64_json format returns data URI with base64 prefix."""
handler
=
_make_handler
()
img
=
MagicMock
()
img
=
MagicMock
()
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"fake_png_data"
)
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"fake_png_data"
)
results
=
prepare_image_output
([
img
],
"b64_json"
)
results
=
await
handler
.
_
prepare_image_output
([
img
],
"req-1"
,
"b64_json"
)
assert
len
(
results
)
==
1
assert
len
(
results
)
==
1
assert
results
[
0
].
startswith
(
"data:image/png;base64,"
)
assert
results
[
0
].
startswith
(
"data:image/png;base64,"
)
def
test_b64_default_when_none
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_b64_default_when_none
(
self
):
"""None response_format defaults to base64 encoding."""
"""None response_format defaults to base64 encoding."""
handler
=
_make_handler
()
img
=
MagicMock
()
img
=
MagicMock
()
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"data"
)
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"data"
)
results
=
prepare_image_output
([
img
],
None
)
results
=
await
handler
.
_
prepare_image_output
([
img
],
"req-1"
,
None
)
assert
results
[
0
].
startswith
(
"data:image/png;base64,"
)
assert
results
[
0
].
startswith
(
"data:image/png;base64,"
)
def
test_invalid_format
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_invalid_format
(
self
):
"""Unsupported response_format raises ValueError."""
"""Unsupported response_format raises ValueError."""
handler
=
_make_handler
()
with
pytest
.
raises
(
ValueError
,
match
=
"Invalid response format"
):
with
pytest
.
raises
(
ValueError
,
match
=
"Invalid response format"
):
prepare_image_output
([
MagicMock
()],
"invalid"
)
await
handler
.
_
prepare_image_output
([
MagicMock
()],
"req-1"
,
"invalid"
)
def
test_multiple_images
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_multiple_images
(
self
):
"""Multiple input images produce one output entry each."""
"""Multiple input images produce one output entry each."""
handler
=
_make_handler
()
imgs
=
[
MagicMock
()
for
_
in
range
(
3
)]
imgs
=
[
MagicMock
()
for
_
in
range
(
3
)]
for
img
in
imgs
:
for
img
in
imgs
:
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
results
=
prepare_image_output
(
imgs
,
"b64_json"
)
results
=
await
handler
.
_
prepare_image_output
(
imgs
,
"req-1"
,
"b64_json"
)
assert
len
(
results
)
==
3
assert
len
(
results
)
==
3
...
@@ -160,23 +163,25 @@ class TestFormatTextChunk:
...
@@ -160,23 +163,25 @@ class TestFormatTextChunk:
class
TestFormatImageChunk
:
class
TestFormatImageChunk
:
def
test_chat_completion_format
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_chat_completion_format
(
self
):
"""Chat completion route returns image_url content parts."""
"""Chat completion route returns image_url content parts."""
handler
=
_make_handler
()
handler
=
_make_handler
()
img
=
MagicMock
()
img
=
MagicMock
()
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
chunk
=
handler
.
_format_image_chunk
(
chunk
=
await
handler
.
_format_image_chunk
(
[
img
],
"req-1"
,
request_type
=
RequestType
.
CHAT_COMPLETION
[
img
],
"req-1"
,
request_type
=
RequestType
.
CHAT_COMPLETION
)
)
assert
chunk
[
"object"
]
==
"chat.completion.chunk"
assert
chunk
[
"object"
]
==
"chat.completion.chunk"
assert
chunk
[
"choices"
][
0
][
"delta"
][
"content"
][
0
][
"type"
]
==
"image_url"
assert
chunk
[
"choices"
][
0
][
"delta"
][
"content"
][
0
][
"type"
]
==
"image_url"
def
test_image_generation_b64_format
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_image_generation_b64_format
(
self
):
"""Image generation with b64_json format returns base64 data."""
"""Image generation with b64_json format returns base64 data."""
handler
=
_make_handler
()
handler
=
_make_handler
()
img
=
MagicMock
()
img
=
MagicMock
()
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
chunk
=
handler
.
_format_image_chunk
(
chunk
=
await
handler
.
_format_image_chunk
(
[
img
],
[
img
],
"req-1"
,
"req-1"
,
response_format
=
"b64_json"
,
response_format
=
"b64_json"
,
...
@@ -184,12 +189,13 @@ class TestFormatImageChunk:
...
@@ -184,12 +189,13 @@ class TestFormatImageChunk:
)
)
assert
chunk
[
"data"
][
0
][
"b64_json"
]
is
not
None
assert
chunk
[
"data"
][
0
][
"b64_json"
]
is
not
None
def
test_image_generation_default_format_returns_b64
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_image_generation_default_format_returns_b64
(
self
):
"""Image generation with response_format=None defaults to b64_json."""
"""Image generation with response_format=None defaults to b64_json."""
handler
=
_make_handler
()
handler
=
_make_handler
()
img
=
MagicMock
()
img
=
MagicMock
()
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
img
.
save
=
lambda
b
,
format
:
b
.
write
(
b
"px"
)
chunk
=
handler
.
_format_image_chunk
(
chunk
=
await
handler
.
_format_image_chunk
(
[
img
],
[
img
],
"req-1"
,
"req-1"
,
response_format
=
None
,
response_format
=
None
,
...
@@ -197,10 +203,11 @@ class TestFormatImageChunk:
...
@@ -197,10 +203,11 @@ class TestFormatImageChunk:
)
)
assert
chunk
[
"data"
][
0
][
"b64_json"
]
is
not
None
assert
chunk
[
"data"
][
0
][
"b64_json"
]
is
not
None
def
test_empty_images_returns_error
(
self
):
@
pytest
.
mark
.
asyncio
async
def
test_empty_images_returns_error
(
self
):
"""Empty image list produces an error chunk."""
"""Empty image list produces an error chunk."""
handler
=
_make_handler
()
handler
=
_make_handler
()
chunk
=
handler
.
_format_image_chunk
([],
"req-1"
)
chunk
=
await
handler
.
_format_image_chunk
([],
"req-1"
)
assert
"Error"
in
chunk
[
"choices"
][
0
][
"delta"
][
"content"
]
assert
"Error"
in
chunk
[
"choices"
][
0
][
"delta"
][
"content"
]
...
...
components/src/dynamo/vllm/tests/test_vllm_renderer_api.py
View file @
e94f4647
...
@@ -358,7 +358,7 @@ class TestVllmRendererApi:
...
@@ -358,7 +358,7 @@ class TestVllmRendererApi:
position. vllm_processor.py constructs EngineCoreOutput by keyword
position. vllm_processor.py constructs EngineCoreOutput by keyword
and reads fields from EngineCoreRequest positionally.
and reads fields from EngineCoreRequest positionally.
"""
"""
expected
_request_fields
=
(
base
_request_fields
=
(
"request_id"
,
"request_id"
,
"prompt_token_ids"
,
"prompt_token_ids"
,
"mm_features"
,
"mm_features"
,
...
@@ -377,11 +377,15 @@ class TestVllmRendererApi:
...
@@ -377,11 +377,15 @@ class TestVllmRendererApi:
"resumable"
,
"resumable"
,
"external_req_id"
,
"external_req_id"
,
)
)
# vllm-omni monkey-patches EngineCoreRequest with an extra field
# (only installed on amd64, not arm64)
omni_fields
=
base_request_fields
+
(
"additional_information"
,)
actual_request_fields
=
EngineCoreRequest
.
__struct_fields__
actual_request_fields
=
EngineCoreRequest
.
__struct_fields__
assert
actual_request_fields
==
expected
_request_fields
,
(
assert
actual_request_fields
in
(
base
_request_fields
,
omni_fields
),
(
"EngineCoreRequest fields changed!
\n
"
"EngineCoreRequest fields changed!
\n
"
f
"Expected:
{
expected_request_fields
}
\n
"
f
"Expected (base):
{
base_request_fields
}
\n
"
f
"Actual:
{
actual_request_fields
}
\n
"
f
"Expected (omni):
{
omni_fields
}
\n
"
f
"Actual:
{
actual_request_fields
}
\n
"
"Update request construction in components/src/dynamo/frontend/vllm_processor.py"
"Update request construction in components/src/dynamo/frontend/vllm_processor.py"
)
)
...
...
container/context.yaml
View file @
e94f4647
...
@@ -45,6 +45,7 @@ vllm:
...
@@ -45,6 +45,7 @@ vllm:
vllm_ref
:
v0.15.1
vllm_ref
:
v0.15.1
flashinf_ref
:
v0.6.1
flashinf_ref
:
v0.6.1
lmcache_ref
:
0.3.13
lmcache_ref
:
0.3.13
vllm_omni_ref
:
"
0.14.0"
max_jobs
:
"
10"
max_jobs
:
"
10"
enable_media_ffmpeg
:
"
true"
enable_media_ffmpeg
:
"
true"
enable_gpu_memory_service
:
"
true"
enable_gpu_memory_service
:
"
true"
...
...
container/deps/vllm/install_vllm.sh
View file @
e94f4647
...
@@ -6,8 +6,9 @@
...
@@ -6,8 +6,9 @@
# Installation order:
# Installation order:
# 1. LMCache (installed first so vLLM's dependencies take precedence)
# 1. LMCache (installed first so vLLM's dependencies take precedence)
# 2. vLLM
# 2. vLLM
# 3. DeepGEMM
# 3. vLLM-Omni
# 4. EP kernels
# 4. DeepGEMM
# 5. EP kernels
set
-euo
pipefail
set
-euo
pipefail
...
@@ -25,6 +26,7 @@ DEEPGEMM_REF=""
...
@@ -25,6 +26,7 @@ DEEPGEMM_REF=""
CUDA_VERSION
=
"12.9"
CUDA_VERSION
=
"12.9"
FLASHINF_REF
=
"v0.6.1"
FLASHINF_REF
=
"v0.6.1"
LMCACHE_REF
=
"0.3.13"
LMCACHE_REF
=
"0.3.13"
VLLM_OMNI_REF
=
"0.14.0"
while
[[
$#
-gt
0
]]
;
do
while
[[
$#
-gt
0
]]
;
do
case
$1
in
case
$1
in
...
@@ -56,6 +58,10 @@ while [[ $# -gt 0 ]]; do
...
@@ -56,6 +58,10 @@ while [[ $# -gt 0 ]]; do
LMCACHE_REF
=
"
$2
"
LMCACHE_REF
=
"
$2
"
shift
2
shift
2
;;
;;
--vllm-omni-ref
)
VLLM_OMNI_REF
=
"
$2
"
shift
2
;;
--torch-cuda-arch-list
)
--torch-cuda-arch-list
)
TORCH_CUDA_ARCH_LIST
=
"
$2
"
TORCH_CUDA_ARCH_LIST
=
"
$2
"
shift
2
shift
2
...
@@ -65,7 +71,7 @@ while [[ $# -gt 0 ]]; do
...
@@ -65,7 +71,7 @@ while [[ $# -gt 0 ]]; do
shift
2
shift
2
;;
;;
-h
|
--help
)
-h
|
--help
)
echo
"Usage:
$0
[--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--lmcache-ref REF] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
echo
"Usage:
$0
[--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--lmcache-ref REF]
[--vllm-omni-ref REF]
[--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
echo
"Options:"
echo
"Options:"
echo
" --vllm-ref REF vLLM release version (default:
${
VLLM_REF
}
)"
echo
" --vllm-ref REF vLLM release version (default:
${
VLLM_REF
}
)"
echo
" --max-jobs NUM Maximum parallel jobs (default:
${
MAX_JOBS
}
)"
echo
" --max-jobs NUM Maximum parallel jobs (default:
${
MAX_JOBS
}
)"
...
@@ -74,6 +80,7 @@ while [[ $# -gt 0 ]]; do
...
@@ -74,6 +80,7 @@ while [[ $# -gt 0 ]]; do
echo
" --deepgemm-ref REF DeepGEMM git ref (default:
${
DEEPGEMM_REF
}
)"
echo
" --deepgemm-ref REF DeepGEMM git ref (default:
${
DEEPGEMM_REF
}
)"
echo
" --flashinf-ref REF FlashInfer version (default:
${
FLASHINF_REF
}
)"
echo
" --flashinf-ref REF FlashInfer version (default:
${
FLASHINF_REF
}
)"
echo
" --lmcache-ref REF LMCache version (default:
${
LMCACHE_REF
}
)"
echo
" --lmcache-ref REF LMCache version (default:
${
LMCACHE_REF
}
)"
echo
" --vllm-omni-ref REF vLLM-Omni version (default:
${
VLLM_OMNI_REF
}
)"
echo
" --torch-cuda-arch-list LIST CUDA architectures (default:
${
TORCH_CUDA_ARCH_LIST
}
)"
echo
" --torch-cuda-arch-list LIST CUDA architectures (default:
${
TORCH_CUDA_ARCH_LIST
}
)"
echo
" --cuda-version VERSION CUDA version (default:
${
CUDA_VERSION
}
)"
echo
" --cuda-version VERSION CUDA version (default:
${
CUDA_VERSION
}
)"
exit
0
exit
0
...
@@ -160,6 +167,19 @@ else
...
@@ -160,6 +167,19 @@ else
fi
fi
echo
"✓ vLLM installation completed"
echo
"✓ vLLM installation completed"
echo
"
\n
=== Installing vLLM-Omni ==="
if
[
-n
"
$VLLM_OMNI_REF
"
]
&&
[
"
$ARCH
"
=
"amd64"
]
;
then
# Save original vllm entrypoint before vllm-omni overwrites it
VLLM_BIN
=
$(
which vllm
)
cp
"
$VLLM_BIN
"
/tmp/vllm-entrypoint-backup
uv pip
install
vllm-omni
==
${
VLLM_OMNI_REF
}
# Restore original vllm CLI entrypoint (vllm-omni replaces it with its own)
cp
/tmp/vllm-entrypoint-backup
"
$VLLM_BIN
"
echo
"✓ vLLM-Omni
${
VLLM_OMNI_REF
}
installed (original vllm entrypoint preserved)"
else
echo
"⚠ Skipping vLLM-Omni (no ref provided or ARM64 not supported)"
fi
echo
"
\n
=== Installing DeepGEMM ==="
echo
"
\n
=== Installing DeepGEMM ==="
cd
$INSTALLATION_DIR
/vllm/tools
cd
$INSTALLATION_DIR
/vllm/tools
if
[
-n
"
$DEEPGEMM_REF
"
]
;
then
if
[
-n
"
$DEEPGEMM_REF
"
]
;
then
...
...
container/templates/args.Dockerfile
View file @
e94f4647
...
@@ -71,6 +71,7 @@ ARG MAX_JOBS={{ context.vllm.max_jobs }}
...
@@ -71,6 +71,7 @@ ARG MAX_JOBS={{ context.vllm.max_jobs }}
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
ARG
FLASHINF_REF={{ context.vllm.flashinf_ref }}
ARG
FLASHINF_REF={{ context.vllm.flashinf_ref }}
ARG
LMCACHE_REF={{ context.vllm.lmcache_ref }}
ARG
LMCACHE_REF={{ context.vllm.lmcache_ref }}
ARG
VLLM_OMNI_REF={{ context.vllm.vllm_omni_ref }}
# If left blank, then we will fallback to vLLM defaults
# If left blank, then we will fallback to vLLM defaults
ARG
DEEPGEMM_REF=""
ARG
DEEPGEMM_REF=""
...
...
container/templates/vllm_framework.Dockerfile
View file @
e94f4647
...
@@ -68,6 +68,7 @@ ARG VLLM_GIT_URL
...
@@ -68,6 +68,7 @@ ARG VLLM_GIT_URL
ARG
DEEPGEMM_REF
ARG
DEEPGEMM_REF
ARG
FLASHINF_REF
ARG
FLASHINF_REF
ARG
LMCACHE_REF
ARG
LMCACHE_REF
ARG
VLLM_OMNI_REF
ARG
CUDA_VERSION
ARG
CUDA_VERSION
ARG
MAX_JOBS
ARG
MAX_JOBS
...
@@ -88,6 +89,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
...
@@ -88,6 +89,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
${
DEEPGEMM_REF
:+--deepgemm-ref
"
$DEEPGEMM_REF
"
}
\
${
DEEPGEMM_REF
:+--deepgemm-ref
"
$DEEPGEMM_REF
"
}
\
${
FLASHINF_REF
:+--flashinf-ref
"
$FLASHINF_REF
"
}
\
${
FLASHINF_REF
:+--flashinf-ref
"
$FLASHINF_REF
"
}
\
${
LMCACHE_REF
:+--lmcache-ref
"
$LMCACHE_REF
"
}
\
${
LMCACHE_REF
:+--lmcache-ref
"
$LMCACHE_REF
"
}
\
${
VLLM_OMNI_REF
:+--vllm-omni-ref
"
$VLLM_OMNI_REF
"
}
\
--cuda-version
$CUDA_VERSION
--cuda-version
$CUDA_VERSION
ENV
LD_LIBRARY_PATH=\
ENV
LD_LIBRARY_PATH=\
...
...
container/templates/vllm_runtime.Dockerfile
View file @
e94f4647
...
@@ -140,6 +140,9 @@ COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/nvidia ${SIT
...
@@ -140,6 +140,9 @@ COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/nvidia ${SIT
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/flashinfer_jit_cache ${SITE_PACKAGES}/flashinfer_jit_cache
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/flashinfer_jit_cache ${SITE_PACKAGES}/flashinfer_jit_cache
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/torch ${SITE_PACKAGES}/torch
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/torch ${SITE_PACKAGES}/torch
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/vllm ${SITE_PACKAGES}/vllm
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/vllm ${SITE_PACKAGES}/vllm
{% if platform == "amd64" -%}
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/vllm_omni ${SITE_PACKAGES}/vllm_omni
{% endif -%}
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/triton ${SITE_PACKAGES}/triton
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/triton ${SITE_PACKAGES}/triton
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/flashinfer_cubin ${SITE_PACKAGES}/flashinfer_cubin
COPY
--chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/flashinfer_cubin ${SITE_PACKAGES}/flashinfer_cubin
# Remaining packages and venv structure (bin/, include/, share/, etc.)
# Remaining packages and venv structure (bin/, include/, share/, etc.)
...
@@ -148,6 +151,9 @@ COPY --chmod=775 --chown=dynamo:0 --from=framework \
...
@@ -148,6 +151,9 @@ COPY --chmod=775 --chown=dynamo:0 --from=framework \
--exclude=lib/python*/site-packages/flashinfer_jit_cache \
--exclude=lib/python*/site-packages/flashinfer_jit_cache \
--exclude=lib/python*/site-packages/torch \
--exclude=lib/python*/site-packages/torch \
--exclude=lib/python*/site-packages/vllm \
--exclude=lib/python*/site-packages/vllm \
{%- if platform == "amd64" %}
--exclude=lib/python*/site-packages/vllm_omni \
{%- endif %}
--exclude=lib/python*/site-packages/triton \
--exclude=lib/python*/site-packages/triton \
--exclude=lib/python*/site-packages/flashinfer_cubin \
--exclude=lib/python*/site-packages/flashinfer_cubin \
${VIRTUAL_ENV} ${VIRTUAL_ENV}
${VIRTUAL_ENV} ${VIRTUAL_ENV}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment