Unverified Commit e2716073 authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

fix: Handle model not found error for multimodal example (#1545)

parent 636bac0e
......@@ -18,8 +18,9 @@ import logging
from components.processor import Processor
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from fastapi.responses import JSONResponse, StreamingResponse
from utils.protocol import MultiModalRequest
from utils.vllm import parse_vllm_args
from dynamo.sdk import DYNAMO_IMAGE, api, depends, service
......@@ -38,8 +39,18 @@ logger = logging.getLogger(__name__)
class Frontend:
processor = depends(Processor)
def __init__(self):
class_name = self.__class__.__name__
self.engine_args = parse_vllm_args(class_name, "")
@api(name="v1/chat/completions")
async def generate(self, request: MultiModalRequest):
if self.engine_args.model != request.model:
return JSONResponse(
{"error": f"Model '{request.model}' not found"},
status_code=404,
)
async def content_generator():
async for response in self.processor.generate(request.model_dump_json()):
try:
......
......@@ -18,8 +18,9 @@ import logging
from components.video_processor import Processor
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from fastapi.responses import JSONResponse, StreamingResponse
from utils.protocol import MultiModalRequest
from utils.vllm import parse_vllm_args
from dynamo.sdk import DYNAMO_IMAGE, api, depends, service
......@@ -38,8 +39,18 @@ logger = logging.getLogger(__name__)
class Frontend:
processor = depends(Processor)
def __init__(self):
class_name = self.__class__.__name__
self.engine_args = parse_vllm_args(class_name, "")
@api(name="v1/chat/completions")
async def generate(self, request: MultiModalRequest):
if self.engine_args.model != request.model:
return JSONResponse(
{"error": f"Model '{request.model}' not found"},
status_code=404,
)
async def content_generator():
async for response in self.processor.generate(request.model_dump_json()):
try:
......
......@@ -17,6 +17,9 @@ Common:
block-size: 64
max-model-len: 4096
Frontend:
common-configs: [model]
Processor:
router: round-robin
prompt-template: "USER: <image>\n<prompt> ASSISTANT:"
......
......@@ -18,6 +18,9 @@ Common:
max-model-len: 4096
trust-remote-code: true
Frontend:
common-configs: [model]
Processor:
router: round-robin
prompt-template: "<|user|>\n<|image_1|>\n<prompt><|end|>\n<|assistant|>\n"
......
......@@ -17,6 +17,9 @@ Common:
block-size: 64
max-model-len: 4096
Frontend:
common-configs: [model]
Processor:
router: round-robin
prompt-template: "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|><prompt><|im_end|>\n<|im_start|>assistant\n"
......
......@@ -24,6 +24,9 @@ Common:
video-token-id: 32000
dummy-tokens-per-frame: 144
Frontend:
common-configs: [model]
Processor:
router: round-robin
common-configs: [model, block-size, max-model-len]
......
......@@ -20,6 +20,9 @@ Common:
num-patches: 576
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend:
common-configs: [model]
Processor:
router: round-robin
prompt-template: "USER: <image>\n<prompt> ASSISTANT:"
......
......@@ -25,6 +25,9 @@ Common:
video-token-id: 32000
dummy-tokens-per-frame: 144
Frontend:
common-configs: [model]
Processor:
router: round-robin
common-configs: [model, block-size]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment