Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1282bd81
Unverified
Commit
1282bd81
authored
Jun 03, 2025
by
汪志鹏
Committed by
GitHub
Jun 03, 2025
Browse files
Add tarsier model support (#18985)
Signed-off-by:
汪志鹏
<
wangzhipeng628@gmail.com
>
parent
bdce64f2
Changes
7
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
689 additions
and
0 deletions
+689
-0
docs/models/supported_models.md
docs/models/supported_models.md
+1
-0
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+20
-0
examples/offline_inference/vision_language_multi_image.py
examples/offline_inference/vision_language_multi_image.py
+21
-0
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+1
-0
tests/models/registry.py
tests/models/registry.py
+2
-0
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+1
-0
vllm/model_executor/models/tarsier.py
vllm/model_executor/models/tarsier.py
+643
-0
No files found.
docs/models/supported_models.md
View file @
1282bd81
...
@@ -550,6 +550,7 @@ Specified using `--task generate`.
...
@@ -550,6 +550,7 @@ Specified using `--task generate`.
|
`Qwen2_5OmniThinkerForConditionalGeneration`
| Qwen2.5-Omni | T + I
<sup>
E+
</sup>
+ V
<sup>
E+
</sup>
+ A
<sup>
+
</sup>
|
`Qwen/Qwen2.5-Omni-7B`
| | ✅︎ | ✅︎
\*
|
|
`Qwen2_5OmniThinkerForConditionalGeneration`
| Qwen2.5-Omni | T + I
<sup>
E+
</sup>
+ V
<sup>
E+
</sup>
+ A
<sup>
+
</sup>
|
`Qwen/Qwen2.5-Omni-7B`
| | ✅︎ | ✅︎
\*
|
|
`SkyworkR1VChatModel`
| Skywork-R1V-38B | T + I |
`Skywork/Skywork-R1V-38B`
| | ✅︎ | ✅︎ |
|
`SkyworkR1VChatModel`
| Skywork-R1V-38B | T + I |
`Skywork/Skywork-R1V-38B`
| | ✅︎ | ✅︎ |
|
`SmolVLMForConditionalGeneration`
| SmolVLM2 | T + I |
`SmolVLM2-2.2B-Instruct`
| ✅︎ | | ✅︎ |
|
`SmolVLMForConditionalGeneration`
| SmolVLM2 | T + I |
`SmolVLM2-2.2B-Instruct`
| ✅︎ | | ✅︎ |
|
`TarsierForConditionalGeneration`
| Tarsier | T + I
<sup>
E+
</sup>
|
`omni-search/Tarsier-7b`
,
`omni-search/Tarsier-34b`
| | ✅︎ | ✅︎ |
<sup>
^
</sup>
You need to set the architecture name via
`--hf-overrides`
to match the one in vLLM.
<sup>
^
</sup>
You need to set the architecture name via
`--hf-overrides`
to match the one in vLLM.
• For example, to use DeepSeek-VL2 series models:
• For example, to use DeepSeek-VL2 series models:
...
...
examples/offline_inference/vision_language.py
View file @
1282bd81
...
@@ -333,6 +333,25 @@ def run_smolvlm(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -333,6 +333,25 @@ def run_smolvlm(questions: list[str], modality: str) -> ModelRequestData:
)
)
# omni-research/Tarsier-7b
def
run_tarsier
(
questions
:
list
[
str
],
modality
:
str
)
->
ModelRequestData
:
assert
modality
==
"image"
model_name
=
"omni-research/Tarsier-7b"
engine_args
=
EngineArgs
(
model
=
model_name
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
limit_mm_per_prompt
=
{
modality
:
1
},
)
prompts
=
[(
f
"USER: <image>
\n
{
question
}
ASSISTANT:"
)
for
question
in
questions
]
return
ModelRequestData
(
engine_args
=
engine_args
,
prompts
=
prompts
,
)
# InternVL
# InternVL
def
run_internvl
(
questions
:
list
[
str
],
modality
:
str
)
->
ModelRequestData
:
def
run_internvl
(
questions
:
list
[
str
],
modality
:
str
)
->
ModelRequestData
:
model_name
=
"OpenGVLab/InternVL3-2B"
model_name
=
"OpenGVLab/InternVL3-2B"
...
@@ -1091,6 +1110,7 @@ model_example_map = {
...
@@ -1091,6 +1110,7 @@ model_example_map = {
"qwen2_5_omni"
:
run_qwen2_5_omni
,
"qwen2_5_omni"
:
run_qwen2_5_omni
,
"skywork_chat"
:
run_skyworkr1v
,
"skywork_chat"
:
run_skyworkr1v
,
"smolvlm"
:
run_smolvlm
,
"smolvlm"
:
run_smolvlm
,
"tarsier"
:
run_tarsier
,
}
}
...
...
examples/offline_inference/vision_language_multi_image.py
View file @
1282bd81
...
@@ -691,6 +691,26 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
...
@@ -691,6 +691,26 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
)
)
def
load_tarsier
(
question
:
str
,
image_urls
:
list
[
str
])
->
ModelRequestData
:
model_name
=
"omni-research/Tarsier-7b"
engine_args
=
EngineArgs
(
model
=
model_name
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
)
prompt
=
f
"USER:
{
'<image>'
*
len
(
image_urls
)
}
\n
{
question
}
\n
ASSISTANT:"
image_data
=
[
fetch_image
(
url
)
for
url
in
image_urls
]
return
ModelRequestData
(
engine_args
=
engine_args
,
prompt
=
prompt
,
image_data
=
image_data
,
)
model_example_map
=
{
model_example_map
=
{
"aria"
:
load_aria
,
"aria"
:
load_aria
,
"aya_vision"
:
load_aya_vision
,
"aya_vision"
:
load_aya_vision
,
...
@@ -712,6 +732,7 @@ model_example_map = {
...
@@ -712,6 +732,7 @@ model_example_map = {
"qwen2_vl"
:
load_qwen2_vl
,
"qwen2_vl"
:
load_qwen2_vl
,
"qwen2_5_vl"
:
load_qwen2_5_vl
,
"qwen2_5_vl"
:
load_qwen2_5_vl
,
"smolvlm"
:
load_smolvlm
,
"smolvlm"
:
load_smolvlm
,
"tarsier"
:
load_tarsier
,
}
}
...
...
tests/models/multimodal/processing/test_common.py
View file @
1282bd81
...
@@ -282,6 +282,7 @@ def _test_processing_correctness_one(
...
@@ -282,6 +282,7 @@ def _test_processing_correctness_one(
"Skywork/Skywork-R1V-38B"
,
"Skywork/Skywork-R1V-38B"
,
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
"openai/whisper-large-v3"
,
"openai/whisper-large-v3"
,
"omni-research/Tarsier-7b"
,
])
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"num_batches"
,
[
32
])
...
...
tests/models/registry.py
View file @
1282bd81
...
@@ -406,6 +406,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -406,6 +406,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
),
# noqa: E501
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
),
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
# noqa: E501
trust_remote_code
=
True
),
trust_remote_code
=
True
),
"TarsierForConditionalGeneration"
:
_HfExamplesInfo
(
"omni-research/Tarsier-7b"
,
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"TarsierForConditionalGeneration"
]}),
# noqa: E501
# [Encoder-decoder]
# [Encoder-decoder]
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
# Therefore, we borrow the BartTokenizer from the original Bart model
...
...
vllm/model_executor/models/registry.py
View file @
1282bd81
...
@@ -211,6 +211,7 @@ _MULTIMODAL_MODELS = {
...
@@ -211,6 +211,7 @@ _MULTIMODAL_MODELS = {
"Qwen2_5OmniForConditionalGeneration"
:
(
"qwen2_5_omni_thinker"
,
"Qwen2_5OmniThinkerForConditionalGeneration"
),
# noqa: E501
"Qwen2_5OmniForConditionalGeneration"
:
(
"qwen2_5_omni_thinker"
,
"Qwen2_5OmniThinkerForConditionalGeneration"
),
# noqa: E501
"UltravoxModel"
:
(
"ultravox"
,
"UltravoxModel"
),
"UltravoxModel"
:
(
"ultravox"
,
"UltravoxModel"
),
"Phi4MMForCausalLM"
:
(
"phi4mm"
,
"Phi4MMForCausalLM"
),
"Phi4MMForCausalLM"
:
(
"phi4mm"
,
"Phi4MMForCausalLM"
),
"TarsierForConditionalGeneration"
:
(
"tarsier"
,
"TarsierForConditionalGeneration"
),
# noqa: E501
# [Encoder-decoder]
# [Encoder-decoder]
"Florence2ForConditionalGeneration"
:
(
"florence2"
,
"Florence2ForConditionalGeneration"
),
# noqa: E501
"Florence2ForConditionalGeneration"
:
(
"florence2"
,
"Florence2ForConditionalGeneration"
),
# noqa: E501
"MllamaForConditionalGeneration"
:
(
"mllama"
,
"MllamaForConditionalGeneration"
),
# noqa: E501
"MllamaForConditionalGeneration"
:
(
"mllama"
,
"MllamaForConditionalGeneration"
),
# noqa: E501
...
...
vllm/model_executor/models/tarsier.py
0 → 100644
View file @
1282bd81
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment