Unverified Commit 43e20c06 authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

Support Mimo-VL (#7579)


Signed-off-by: default avatarXinyuan Tong <justinning0323@outlook.com>
parent 4bab50a6
......@@ -38,3 +38,4 @@ in the GitHub search bar.
| **Kimi-VL** (A3B) | `moonshotai/Kimi-VL-A3B-Instruct` | `kimi-vl` | Kimi-VL is a multimodal model that can understand and generate text from images. |
| **Mistral-Small-3.1-24B** | `mistralai/Mistral-Small-3.1-24B-Instruct-2503` | `mistral` | Mistral 3.1 is a multimodal model that can generate text from text or images input. It also supports tool calling and structured output. |
| **Phi-4-multimodal-instruct** | `microsoft/Phi-4-multimodal-instruct` | `phi-4-mm` | Phi-4-multimodal-instruct is the multimodal variant of the Phi-4-mini model, enhanced with LoRA for improved multimodal capabilities. Currently, it supports only text and vision modalities in SGLang. |
| **MiMo-VL** (7B) | `XiaomiMiMo/MiMo-VL-7B-RL` | `mimo-vl` | Xiaomi's compact yet powerful vision-language model featuring a native resolution ViT encoder for fine-grained visual details, an MLP projector for cross-modal alignment, and the MiMo-7B language model optimized for complex reasoning tasks. |
......@@ -921,6 +921,19 @@ register_conv_template(
)
)
register_conv_template(
Conversation(
name="mimo-vl",
system_message="You are MiMo, an AI assistant developed by Xiaomi.",
system_template="<|im_start|>system\n{system_message}",
roles=("<|im_start|>user", "<|im_start|>assistant"),
sep="<|im_end|>\n",
sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
stop_str=["<|im_end|>"],
image_token="<|vision_start|><|image_pad|><|vision_end|>",
)
)
register_conv_template(
Conversation(
......@@ -1049,3 +1062,9 @@ def match_phi_4_mm(model_path: str):
def match_vila(model_path: str):
if re.search(r"vila", model_path, re.IGNORECASE):
return "chatml"
@register_conv_template_matching_function
def match_mimo_vl(model_path: str):
if re.search(r"mimo.*vl", model_path, re.IGNORECASE):
return "mimo-vl"
......@@ -185,5 +185,25 @@ class TestMinicpmoServer(TestOpenAIVisionServer):
self._test_audio_ambient_completion()
class TestMimoVLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "XiaomiMiMo/MiMo-VL-7B-RL"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.6",
],
)
cls.base_url += "/v1"
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment