Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5b8c3907
Unverified
Commit
5b8c3907
authored
May 07, 2025
by
Cyrus Leung
Committed by
GitHub
May 06, 2025
Browse files
[Bugfix] Fix modality limits in vision language example (#17721)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
7525d5f3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
36 additions
and
36 deletions
+36
-36
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+36
-36
No files found.
examples/offline_inference/vision_language.py
View file @
5b8c3907
...
@@ -45,7 +45,7 @@ def run_aria(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -45,7 +45,7 @@ def run_aria(questions: list[str], modality: str) -> ModelRequestData:
max_model_len
=
4096
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[(
f
"<|im_start|>user
\n
<fim_prefix><|img|><fim_suffix>
{
question
}
"
prompts
=
[(
f
"<|im_start|>user
\n
<fim_prefix><|img|><fim_suffix>
{
question
}
"
...
@@ -71,7 +71,7 @@ def run_aya_vision(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -71,7 +71,7 @@ def run_aya_vision(questions: list[str], modality: str) -> ModelRequestData:
max_model_len
=
2048
,
max_model_len
=
2048
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
mm_processor_kwargs
=
{
"crop_to_patches"
:
True
},
mm_processor_kwargs
=
{
"crop_to_patches"
:
True
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
prompts
=
[
f
"<|START_OF_TURN_TOKEN|><|USER_TOKEN|><image>
{
question
}
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
f
"<|START_OF_TURN_TOKEN|><|USER_TOKEN|><image>
{
question
}
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
...
@@ -92,7 +92,7 @@ def run_blip2(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -92,7 +92,7 @@ def run_blip2(questions: list[str], modality: str) -> ModelRequestData:
prompts
=
[
f
"Question:
{
question
}
Answer:"
for
question
in
questions
]
prompts
=
[
f
"Question:
{
question
}
Answer:"
for
question
in
questions
]
engine_args
=
EngineArgs
(
engine_args
=
EngineArgs
(
model
=
"Salesforce/blip2-opt-6.7b"
,
model
=
"Salesforce/blip2-opt-6.7b"
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -110,7 +110,7 @@ def run_chameleon(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -110,7 +110,7 @@ def run_chameleon(questions: list[str], modality: str) -> ModelRequestData:
model
=
"facebook/chameleon-7b"
,
model
=
"facebook/chameleon-7b"
,
max_model_len
=
4096
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -130,7 +130,7 @@ def run_deepseek_vl2(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -130,7 +130,7 @@ def run_deepseek_vl2(questions: list[str], modality: str) -> ModelRequestData:
max_model_len
=
4096
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
hf_overrides
=
{
"architectures"
:
[
"DeepseekVLV2ForCausalLM"
]},
hf_overrides
=
{
"architectures"
:
[
"DeepseekVLV2ForCausalLM"
]},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
prompts
=
[
...
@@ -155,7 +155,7 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -155,7 +155,7 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData:
max_num_seqs
=
2
,
max_num_seqs
=
2
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
"<MORE_DETAILED_CAPTION>"
for
_
in
questions
]
prompts
=
[
"<MORE_DETAILED_CAPTION>"
for
_
in
questions
]
...
@@ -175,7 +175,7 @@ def run_fuyu(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -175,7 +175,7 @@ def run_fuyu(questions: list[str], modality: str) -> ModelRequestData:
model
=
"adept/fuyu-8b"
,
model
=
"adept/fuyu-8b"
,
max_model_len
=
2048
,
max_model_len
=
2048
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -194,7 +194,7 @@ def run_gemma3(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -194,7 +194,7 @@ def run_gemma3(questions: list[str], modality: str) -> ModelRequestData:
max_model_len
=
2048
,
max_model_len
=
2048
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
mm_processor_kwargs
=
{
"do_pan_and_scan"
:
True
},
mm_processor_kwargs
=
{
"do_pan_and_scan"
:
True
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[(
"<bos><start_of_turn>user
\n
"
prompts
=
[(
"<bos><start_of_turn>user
\n
"
...
@@ -219,7 +219,7 @@ def run_glm4v(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -219,7 +219,7 @@ def run_glm4v(questions: list[str], modality: str) -> ModelRequestData:
trust_remote_code
=
True
,
trust_remote_code
=
True
,
enforce_eager
=
True
,
enforce_eager
=
True
,
hf_overrides
=
{
"architectures"
:
[
"GLM4VForCausalLM"
]},
hf_overrides
=
{
"architectures"
:
[
"GLM4VForCausalLM"
]},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
prompts
=
[
...
@@ -246,7 +246,7 @@ def run_h2ovl(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -246,7 +246,7 @@ def run_h2ovl(questions: list[str], modality: str) -> ModelRequestData:
model
=
model_name
,
model
=
model_name
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
max_model_len
=
8192
,
max_model_len
=
8192
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
...
@@ -287,7 +287,7 @@ def run_idefics3(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -287,7 +287,7 @@ def run_idefics3(questions: list[str], modality: str) -> ModelRequestData:
"longest_edge"
:
3
*
364
"longest_edge"
:
3
*
364
},
},
},
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[(
prompts
=
[(
f
"<|begin_of_text|>User:<image>
{
question
}
<end_of_utterance>
\n
Assistant:"
f
"<|begin_of_text|>User:<image>
{
question
}
<end_of_utterance>
\n
Assistant:"
...
@@ -314,7 +314,7 @@ def run_smolvlm(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -314,7 +314,7 @@ def run_smolvlm(questions: list[str], modality: str) -> ModelRequestData:
"longest_edge"
:
384
"longest_edge"
:
384
},
},
},
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
prompts
=
[
(
f
"<|im_start|>User:<image>
{
question
}
<end_of_utterance>
\n
Assistant:"
)
(
f
"<|im_start|>User:<image>
{
question
}
<end_of_utterance>
\n
Assistant:"
)
...
@@ -337,7 +337,7 @@ def run_internvl(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -337,7 +337,7 @@ def run_internvl(questions: list[str], modality: str) -> ModelRequestData:
model
=
model_name
,
model
=
model_name
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
max_model_len
=
4096
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
...
@@ -378,7 +378,7 @@ def run_kimi_vl(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -378,7 +378,7 @@ def run_kimi_vl(questions: list[str], modality: str) -> ModelRequestData:
model
=
"moonshotai/Kimi-VL-A3B-Instruct"
,
model
=
"moonshotai/Kimi-VL-A3B-Instruct"
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
max_model_len
=
4096
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -398,7 +398,7 @@ def run_llava(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -398,7 +398,7 @@ def run_llava(questions: list[str], modality: str) -> ModelRequestData:
engine_args
=
EngineArgs
(
engine_args
=
EngineArgs
(
model
=
"llava-hf/llava-1.5-7b-hf"
,
model
=
"llava-hf/llava-1.5-7b-hf"
,
max_model_len
=
4096
,
max_model_len
=
4096
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -415,7 +415,7 @@ def run_llava_next(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -415,7 +415,7 @@ def run_llava_next(questions: list[str], modality: str) -> ModelRequestData:
engine_args
=
EngineArgs
(
engine_args
=
EngineArgs
(
model
=
"llava-hf/llava-v1.6-mistral-7b-hf"
,
model
=
"llava-hf/llava-v1.6-mistral-7b-hf"
,
max_model_len
=
8192
,
max_model_len
=
8192
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -437,7 +437,7 @@ def run_llava_next_video(questions: list[str],
...
@@ -437,7 +437,7 @@ def run_llava_next_video(questions: list[str],
model
=
"llava-hf/LLaVA-NeXT-Video-7B-hf"
,
model
=
"llava-hf/LLaVA-NeXT-Video-7B-hf"
,
max_model_len
=
8192
,
max_model_len
=
8192
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -465,7 +465,7 @@ def run_llava_onevision(questions: list[str],
...
@@ -465,7 +465,7 @@ def run_llava_onevision(questions: list[str],
engine_args
=
EngineArgs
(
engine_args
=
EngineArgs
(
model
=
"llava-hf/llava-onevision-qwen2-7b-ov-hf"
,
model
=
"llava-hf/llava-onevision-qwen2-7b-ov-hf"
,
max_model_len
=
16384
,
max_model_len
=
16384
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -488,7 +488,7 @@ def run_mantis(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -488,7 +488,7 @@ def run_mantis(questions: list[str], modality: str) -> ModelRequestData:
model
=
"TIGER-Lab/Mantis-8B-siglip-llama3"
,
model
=
"TIGER-Lab/Mantis-8B-siglip-llama3"
,
max_model_len
=
4096
,
max_model_len
=
4096
,
hf_overrides
=
{
"architectures"
:
[
"MantisForConditionalGeneration"
]},
hf_overrides
=
{
"architectures"
:
[
"MantisForConditionalGeneration"
]},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
stop_token_ids
=
[
128009
]
stop_token_ids
=
[
128009
]
...
@@ -529,7 +529,7 @@ def run_minicpmv_base(questions: list[str], modality: str, model_name):
...
@@ -529,7 +529,7 @@ def run_minicpmv_base(questions: list[str], modality: str, model_name):
max_model_len
=
4096
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
# NOTE The stop_token_ids are different for various versions of MiniCPM-V
# NOTE The stop_token_ids are different for various versions of MiniCPM-V
# 2.0
# 2.0
...
@@ -584,7 +584,7 @@ def run_mistral3(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -584,7 +584,7 @@ def run_mistral3(questions: list[str], modality: str) -> ModelRequestData:
max_model_len
=
8192
,
max_model_len
=
8192
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
tensor_parallel_size
=
2
,
tensor_parallel_size
=
2
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
f
"<s>[INST]
{
question
}
\n
[IMG][/INST]"
for
question
in
questions
]
prompts
=
[
f
"<s>[INST]
{
question
}
\n
[IMG][/INST]"
for
question
in
questions
]
...
@@ -610,7 +610,7 @@ def run_mllama(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -610,7 +610,7 @@ def run_mllama(questions: list[str], modality: str) -> ModelRequestData:
model
=
model_name
,
model
=
model_name
,
max_model_len
=
8192
,
max_model_len
=
8192
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
...
@@ -645,7 +645,7 @@ def run_llama4(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -645,7 +645,7 @@ def run_llama4(questions: list[str], modality: str) -> ModelRequestData:
max_num_seqs
=
4
,
max_num_seqs
=
4
,
tensor_parallel_size
=
8
,
tensor_parallel_size
=
8
,
gpu_memory_utilization
=
0.4
,
gpu_memory_utilization
=
0.4
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
...
@@ -680,7 +680,7 @@ def run_molmo(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -680,7 +680,7 @@ def run_molmo(questions: list[str], modality: str) -> ModelRequestData:
model
=
model_name
,
model
=
model_name
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
prompts
=
[
...
@@ -706,7 +706,7 @@ def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -706,7 +706,7 @@ def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData:
trust_remote_code
=
True
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
max_model_len
=
4096
,
tensor_parallel_size
=
4
,
tensor_parallel_size
=
4
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
...
@@ -738,7 +738,7 @@ def run_ovis2(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -738,7 +738,7 @@ def run_ovis2(questions: list[str], modality: str) -> ModelRequestData:
trust_remote_code
=
True
,
trust_remote_code
=
True
,
dtype
=
"half"
,
dtype
=
"half"
,
hf_overrides
=
{
"architectures"
:
[
"Ovis2ForConditionalGeneration"
]},
hf_overrides
=
{
"architectures"
:
[
"Ovis2ForConditionalGeneration"
]},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
placeholder
=
"<image>
\n
"
placeholder
=
"<image>
\n
"
...
@@ -761,7 +761,7 @@ def run_paligemma(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -761,7 +761,7 @@ def run_paligemma(questions: list[str], modality: str) -> ModelRequestData:
prompts
=
[
"caption en"
for
_
in
questions
]
prompts
=
[
"caption en"
for
_
in
questions
]
engine_args
=
EngineArgs
(
engine_args
=
EngineArgs
(
model
=
"google/paligemma-3b-mix-224"
,
model
=
"google/paligemma-3b-mix-224"
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -778,7 +778,7 @@ def run_paligemma2(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -778,7 +778,7 @@ def run_paligemma2(questions: list[str], modality: str) -> ModelRequestData:
prompts
=
[
"caption en"
for
_
in
questions
]
prompts
=
[
"caption en"
for
_
in
questions
]
engine_args
=
EngineArgs
(
engine_args
=
EngineArgs
(
model
=
"google/paligemma2-3b-ft-docci-448"
,
model
=
"google/paligemma2-3b-ft-docci-448"
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -815,7 +815,7 @@ def run_phi3v(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -815,7 +815,7 @@ def run_phi3v(questions: list[str], modality: str) -> ModelRequestData:
max_num_seqs
=
2
,
max_num_seqs
=
2
,
# Note - mm_processor_kwargs can also be passed to generate/chat calls
# Note - mm_processor_kwargs can also be passed to generate/chat calls
mm_processor_kwargs
=
{
"num_crops"
:
16
},
mm_processor_kwargs
=
{
"num_crops"
:
16
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -849,7 +849,7 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -849,7 +849,7 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData:
max_lora_rank
=
320
,
max_lora_rank
=
320
,
# Note - mm_processor_kwargs can also be passed to generate/chat calls
# Note - mm_processor_kwargs can also be passed to generate/chat calls
mm_processor_kwargs
=
{
"dynamic_hd"
:
16
},
mm_processor_kwargs
=
{
"dynamic_hd"
:
16
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
return
ModelRequestData
(
return
ModelRequestData
(
...
@@ -870,7 +870,7 @@ def run_pixtral_hf(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -870,7 +870,7 @@ def run_pixtral_hf(questions: list[str], modality: str) -> ModelRequestData:
model
=
model_name
,
model
=
model_name
,
max_model_len
=
6144
,
max_model_len
=
6144
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
f
"<s>[INST]
{
question
}
\n
[IMG][/INST]"
for
question
in
questions
]
prompts
=
[
f
"<s>[INST]
{
question
}
\n
[IMG][/INST]"
for
question
in
questions
]
...
@@ -891,7 +891,7 @@ def run_qwen_vl(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -891,7 +891,7 @@ def run_qwen_vl(questions: list[str], modality: str) -> ModelRequestData:
max_model_len
=
1024
,
max_model_len
=
1024
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
hf_overrides
=
{
"architectures"
:
[
"QwenVLForConditionalGeneration"
]},
hf_overrides
=
{
"architectures"
:
[
"QwenVLForConditionalGeneration"
]},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
prompts
=
[
f
"
{
question
}
Picture 1: <img></img>
\n
"
for
question
in
questions
]
prompts
=
[
f
"
{
question
}
Picture 1: <img></img>
\n
"
for
question
in
questions
]
...
@@ -916,7 +916,7 @@ def run_qwen2_vl(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -916,7 +916,7 @@ def run_qwen2_vl(questions: list[str], modality: str) -> ModelRequestData:
"min_pixels"
:
28
*
28
,
"min_pixels"
:
28
*
28
,
"max_pixels"
:
1280
*
28
*
28
,
"max_pixels"
:
1280
*
28
*
28
,
},
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
if
modality
==
"image"
:
if
modality
==
"image"
:
...
@@ -951,7 +951,7 @@ def run_qwen2_5_vl(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -951,7 +951,7 @@ def run_qwen2_5_vl(questions: list[str], modality: str) -> ModelRequestData:
"max_pixels"
:
1280
*
28
*
28
,
"max_pixels"
:
1280
*
28
*
28
,
"fps"
:
1
,
"fps"
:
1
,
},
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
if
modality
==
"image"
:
if
modality
==
"image"
:
...
@@ -985,7 +985,7 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
...
@@ -985,7 +985,7 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
"max_pixels"
:
1280
*
28
*
28
,
"max_pixels"
:
1280
*
28
*
28
,
"fps"
:
[
1
],
"fps"
:
[
1
],
},
},
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
if
modality
==
"image"
:
if
modality
==
"image"
:
...
@@ -1018,7 +1018,7 @@ def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData:
...
@@ -1018,7 +1018,7 @@ def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData:
model
=
model_name
,
model
=
model_name
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
max_model_len
=
4096
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
modality
:
1
},
)
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment