Unverified Commit 40a87562 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Chore]: Remove HF format Phi4-MM examples (#31405)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent 3d024985
...@@ -213,37 +213,6 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData: ...@@ -213,37 +213,6 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData:
) )
def run_phi4_multimodal(question: str, audio_count: int) -> ModelRequestData:
"""
Phi-4-multimodal-instruct supports both image and audio inputs. Here, we
show how to process audio inputs.
"""
model_path = snapshot_download(
"microsoft/Phi-4-multimodal-instruct", revision="refs/pr/70"
)
# Since the vision-lora and speech-lora co-exist with the base model,
# we have to manually specify the path of the lora weights.
speech_lora_path = os.path.join(model_path, "speech-lora")
placeholders = "<|audio|>" * audio_count
prompts = f"<|user|>{placeholders}{question}<|end|><|assistant|>"
engine_args = EngineArgs(
model=model_path,
max_model_len=12800,
max_num_seqs=2,
enable_lora=True,
max_lora_rank=320,
limit_mm_per_prompt={"audio": audio_count},
)
return ModelRequestData(
engine_args=engine_args,
prompt=prompts,
lora_requests=[LoRARequest("speech", 1, speech_lora_path)],
)
# Qwen2-Audio # Qwen2-Audio
def run_qwen2_audio(question: str, audio_count: int) -> ModelRequestData: def run_qwen2_audio(question: str, audio_count: int) -> ModelRequestData:
model_name = "Qwen/Qwen2-Audio-7B-Instruct" model_name = "Qwen/Qwen2-Audio-7B-Instruct"
...@@ -416,7 +385,6 @@ model_example_map = { ...@@ -416,7 +385,6 @@ model_example_map = {
"midashenglm": run_midashenglm, "midashenglm": run_midashenglm,
"minicpmo": run_minicpmo, "minicpmo": run_minicpmo,
"phi4_mm": run_phi4mm, "phi4_mm": run_phi4mm,
"phi4_multimodal": run_phi4_multimodal,
"qwen2_audio": run_qwen2_audio, "qwen2_audio": run_qwen2_audio,
"qwen2_5_omni": run_qwen2_5_omni, "qwen2_5_omni": run_qwen2_5_omni,
"ultravox": run_ultravox, "ultravox": run_ultravox,
......
...@@ -1424,41 +1424,6 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData: ...@@ -1424,41 +1424,6 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData:
) )
# HF format Phi-4-multimodal-instruct
def run_phi4_multimodal(questions: list[str], modality: str) -> ModelRequestData:
"""
Phi-4-multimodal-instruct supports both image and audio inputs. Here, we
show how to process image inputs.
"""
assert modality == "image"
model_path = snapshot_download(
"microsoft/Phi-4-multimodal-instruct", revision="refs/pr/70"
)
# Since the vision-lora and speech-lora co-exist with the base model,
# we have to manually specify the path of the lora weights.
vision_lora_path = os.path.join(model_path, "vision-lora")
prompts = [
f"<|user|><|image|>{question}<|end|><|assistant|>" for question in questions
]
engine_args = EngineArgs(
model=model_path,
max_model_len=5120,
max_num_seqs=2,
max_num_batched_tokens=12800,
enable_lora=True,
max_lora_rank=320,
# Note - mm_processor_kwargs can also be passed to generate/chat calls
mm_processor_kwargs={"dynamic_hd": 16},
limit_mm_per_prompt={"image": 1},
)
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
lora_requests=[LoRARequest("vision", 1, vision_lora_path)],
)
# Pixtral HF-format # Pixtral HF-format
def run_pixtral_hf(questions: list[str], modality: str) -> ModelRequestData: def run_pixtral_hf(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -1904,7 +1869,6 @@ model_example_map = { ...@@ -1904,7 +1869,6 @@ model_example_map = {
"paligemma2": run_paligemma2, "paligemma2": run_paligemma2,
"phi3_v": run_phi3v, "phi3_v": run_phi3v,
"phi4_mm": run_phi4mm, "phi4_mm": run_phi4mm,
"phi4_multimodal": run_phi4_multimodal,
"pixtral_hf": run_pixtral_hf, "pixtral_hf": run_pixtral_hf,
"qwen_vl": run_qwen_vl, "qwen_vl": run_qwen_vl,
"qwen2_vl": run_qwen2_vl, "qwen2_vl": run_qwen2_vl,
......
...@@ -932,40 +932,6 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData: ...@@ -932,40 +932,6 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData:
) )
def load_phi4_multimodal(question: str, image_urls: list[str]) -> ModelRequestData:
"""
Phi-4-multimodal-instruct supports both image and audio inputs. Here, we
show how to process multi images inputs.
"""
model_path = snapshot_download(
"microsoft/Phi-4-multimodal-instruct", revision="refs/pr/70"
)
# Since the vision-lora and speech-lora co-exist with the base model,
# we have to manually specify the path of the lora weights.
vision_lora_path = os.path.join(model_path, "vision-lora")
engine_args = EngineArgs(
model=model_path,
max_model_len=4096,
max_num_seqs=2,
limit_mm_per_prompt={"image": len(image_urls)},
enable_lora=True,
max_lora_rank=320,
# Note - mm_processor_kwargs can also be passed to generate/chat calls
mm_processor_kwargs={"dynamic_hd": 4},
)
placeholders = "<|image|>" * len(image_urls)
prompt = f"<|user|>{placeholders}{question}<|end|><|assistant|>"
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
lora_requests=[LoRARequest("vision", 1, vision_lora_path)],
)
def load_qwen_vl_chat(question: str, image_urls: list[str]) -> ModelRequestData: def load_qwen_vl_chat(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "Qwen/Qwen-VL-Chat" model_name = "Qwen/Qwen-VL-Chat"
engine_args = EngineArgs( engine_args = EngineArgs(
...@@ -1363,7 +1329,6 @@ model_example_map = { ...@@ -1363,7 +1329,6 @@ model_example_map = {
"paddleocr_vl": load_paddleocr_vl, "paddleocr_vl": load_paddleocr_vl,
"phi3_v": load_phi3v, "phi3_v": load_phi3v,
"phi4_mm": load_phi4mm, "phi4_mm": load_phi4mm,
"phi4_multimodal": load_phi4_multimodal,
"pixtral_hf": load_pixtral_hf, "pixtral_hf": load_pixtral_hf,
"qwen_vl_chat": load_qwen_vl_chat, "qwen_vl_chat": load_qwen_vl_chat,
"qwen2_vl": load_qwen2_vl, "qwen2_vl": load_qwen2_vl,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment