Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
7e55a734
"lib/parsers/src/vscode:/vscode.git/clone" did not exist on "6e0d62d74f5b58f1bd0bbb078134f79ad9350366"
Unverified
Commit
7e55a734
authored
Mar 05, 2026
by
Indrajit Bhosale
Committed by
GitHub
Mar 05, 2026
Browse files
chore: Remove llava-hf recipies (#6954)
parent
dc14ce05
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
70 deletions
+4
-70
examples/backends/trtllm/launch/disagg_multimodal.sh
examples/backends/trtllm/launch/disagg_multimodal.sh
+4
-7
examples/backends/trtllm/launch/epd_multimodal_image.sh
examples/backends/trtllm/launch/epd_multimodal_image.sh
+0
-63
No files found.
examples/backends/trtllm/launch/disagg_multimodal.sh
View file @
7e55a734
...
@@ -4,14 +4,13 @@
...
@@ -4,14 +4,13 @@
# Environment variables with defaults
# Environment variables with defaults
export
DYNAMO_HOME
=
${
DYNAMO_HOME
:-
"/workspace"
}
export
DYNAMO_HOME
=
${
DYNAMO_HOME
:-
"/workspace"
}
export
MODEL_PATH
=
${
MODEL_PATH
:-
"
llava-hf/llava-v1.6-mistral-7b-hf
"
}
export
MODEL_PATH
=
${
MODEL_PATH
:-
"
Qwen/Qwen3-VL-2B-Instruct
"
}
export
SERVED_MODEL_NAME
=
${
SERVED_MODEL_NAME
:-
"
llava-v1.6-mistral-7b-hf
"
}
export
SERVED_MODEL_NAME
=
${
SERVED_MODEL_NAME
:-
"
Qwen3-VL-2B-Instruct
"
}
export
PREFILL_ENGINE_ARGS
=
${
PREFILL_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/
llava-v1.6-mistral-7b-hf
/prefill.yaml"
}
export
PREFILL_ENGINE_ARGS
=
${
PREFILL_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/
qwen3-vl-2b-instruct
/prefill.yaml"
}
export
DECODE_ENGINE_ARGS
=
${
DECODE_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/
llava-v1.6-mistral-7b-hf
/decode.yaml"
}
export
DECODE_ENGINE_ARGS
=
${
DECODE_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/
qwen3-vl-2b-instruct
/decode.yaml"
}
export
PREFILL_CUDA_VISIBLE_DEVICES
=
${
PREFILL_CUDA_VISIBLE_DEVICES
:-
"0"
}
export
PREFILL_CUDA_VISIBLE_DEVICES
=
${
PREFILL_CUDA_VISIBLE_DEVICES
:-
"0"
}
export
DECODE_CUDA_VISIBLE_DEVICES
=
${
DECODE_CUDA_VISIBLE_DEVICES
:-
"1"
}
export
DECODE_CUDA_VISIBLE_DEVICES
=
${
DECODE_CUDA_VISIBLE_DEVICES
:-
"1"
}
export
MODALITY
=
${
MODALITY
:-
"multimodal"
}
export
MODALITY
=
${
MODALITY
:-
"multimodal"
}
export
CUSTOM_TEMPLATE
=
${
CUSTOM_TEMPLATE
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/templates/llava_multimodal.jinja"
}
# Setup cleanup trap
# Setup cleanup trap
cleanup
()
{
cleanup
()
{
...
@@ -34,7 +33,6 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
...
@@ -34,7 +33,6 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--modality
"
$MODALITY
"
\
--custom-jinja-template
"
$CUSTOM_TEMPLATE
"
\
--disaggregation-mode
prefill &
--disaggregation-mode
prefill &
PREFILL_PID
=
$!
PREFILL_PID
=
$!
...
@@ -44,5 +42,4 @@ CUDA_VISIBLE_DEVICES=$DECODE_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
...
@@ -44,5 +42,4 @@ CUDA_VISIBLE_DEVICES=$DECODE_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--modality
"
$MODALITY
"
\
--custom-jinja-template
"
$CUSTOM_TEMPLATE
"
\
--disaggregation-mode
decode
--disaggregation-mode
decode
examples/backends/trtllm/launch/epd_multimodal_image.sh
deleted
100755 → 0
View file @
dc14ce05
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Environment variables with defaults
export
DYNAMO_HOME
=
${
DYNAMO_HOME
:-
"/workspace"
}
export
MODEL_PATH
=
${
MODEL_PATH
:-
"llava-hf/llava-v1.6-mistral-7b-hf"
}
export
SERVED_MODEL_NAME
=
${
SERVED_MODEL_NAME
:-
"llava-v1.6-mistral-7b-hf"
}
export
PREFILL_ENGINE_ARGS
=
${
PREFILL_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/prefill.yaml"
}
export
DECODE_ENGINE_ARGS
=
${
DECODE_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/decode.yaml"
}
export
ENCODE_ENGINE_ARGS
=
${
ENCODE_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/llava-v1.6-mistral-7b-hf/encode.yaml"
}
export
PREFILL_CUDA_VISIBLE_DEVICES
=
${
PREFILL_CUDA_VISIBLE_DEVICES
:-
"0"
}
export
DECODE_CUDA_VISIBLE_DEVICES
=
${
DECODE_CUDA_VISIBLE_DEVICES
:-
"1"
}
export
ENCODE_CUDA_VISIBLE_DEVICES
=
${
ENCODE_CUDA_VISIBLE_DEVICES
:-
"2"
}
export
ENCODE_ENDPOINT
=
${
ENCODE_ENDPOINT
:-
"dyn://dynamo.tensorrt_llm_encode.generate"
}
export
MODALITY
=
${
MODALITY
:-
"multimodal"
}
export
CUSTOM_TEMPLATE
=
${
CUSTOM_TEMPLATE
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/templates/llava_multimodal.jinja"
}
# Setup cleanup trap
cleanup
()
{
echo
"Cleaning up background processes..."
kill
$DYNAMO_PID
$PREFILL_PID
$DECODE_PID
$ENCODE_PID
2>/dev/null
||
true
wait
$DYNAMO_PID
$PREFILL_PID
$DECODE_PID
$ENCODE_PID
2>/dev/null
||
true
echo
"Cleanup complete."
}
trap
cleanup EXIT INT TERM
# run frontend
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
# run encode worker
CUDA_VISIBLE_DEVICES
=
$ENCODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$ENCODE_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--disaggregation-mode
encode &
ENCODE_PID
=
$!
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--disaggregation-mode
prefill
\
--encode-endpoint
"
$ENCODE_ENDPOINT
"
\
--custom-jinja-template
"
$CUSTOM_TEMPLATE
"
&
PREFILL_PID
=
$!
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--disaggregation-mode
decode
\
--custom-jinja-template
"
$CUSTOM_TEMPLATE
"
&
DECODE_PID
=
$!
wait
$DYNAMO_PID
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment