Merge pull request #220 from InfiniTensor/issue/219

issue/219: support vllm bench

Merge pull request #220 from InfiniTensor/issue/219
issue/219: support vllm bench
a940a967 · thatPepe · GitHub · 1a408e1a · 9e64b06c · a940a967
Unverified Commit a940a967 authored Feb 12, 2026 by thatPepe Committed by GitHub Feb 12, 2026
Show whitespace changes
Inline Side-by-side

Showing with 36 additions and 0 deletions

python/infinilm/server/inference_server.py python/infinilm/server/inference_server.py +36 -0

No files found.
--- a/python/infinilm/server/inference_server.py
+++ b/python/infinilm/server/inference_server.py
@@ -210,6 +210,9 @@ class InferenceServer:
                else:
                    data["messages"] = [{"role": "user", "content": data.get("prompt")}]

+            # Normalize messages to handle multimodal content (list format)
+            data["messages"] = self._normalize_messages(data.get("messages", []))
+
            stream = data.get("stream", False)
            request_id = f"cmpl-{uuid.uuid4().hex}"

@@ -257,6 +260,39 @@ class InferenceServer:
        async def list_models_legacy():
            return _models_payload()

+    def _normalize_messages(self, messages: list) -> list:
+        """Normalize messages to handle multimodal content (list format).
+
+        Converts content from list format [{"type": "text", "text": "..."}]
+        to string format for chat template compatibility.
+        """
+        normalized = []
+        for msg in messages:
+            if not isinstance(msg, dict):
+                normalized.append(msg)
+                continue
+
+            content = msg.get("content")
+            if isinstance(content, list):
+                # Extract text from multimodal content list
+                text_parts = []
+                for part in content:
+                    if isinstance(part, dict):
+                        if part.get("type") == "text" and "text" in part:
+                            text_parts.append(part["text"])
+                        elif isinstance(part, str):
+                            text_parts.append(part)
+                    elif isinstance(part, str):
+                        text_parts.append(part)
+                # Join all text parts
+                normalized_msg = msg.copy()
+                normalized_msg["content"] = "".join(text_parts) if text_parts else ""
+                normalized.append(normalized_msg)
+            else:
+                normalized.append(msg)
+
+        return normalized
+
    def _build_sampling_params(self, data: dict) -> SamplingParams:
        """Build SamplingParams from request data."""
        # Support both: