Unverified Commit e2716073 authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

fix: Handle model not found error for multimodal example (#1545)

parent 636bac0e
...@@ -18,8 +18,9 @@ import logging ...@@ -18,8 +18,9 @@ import logging
from components.processor import Processor from components.processor import Processor
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.responses import StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from utils.protocol import MultiModalRequest from utils.protocol import MultiModalRequest
from utils.vllm import parse_vllm_args
from dynamo.sdk import DYNAMO_IMAGE, api, depends, service from dynamo.sdk import DYNAMO_IMAGE, api, depends, service
...@@ -38,8 +39,18 @@ logger = logging.getLogger(__name__) ...@@ -38,8 +39,18 @@ logger = logging.getLogger(__name__)
class Frontend: class Frontend:
processor = depends(Processor) processor = depends(Processor)
def __init__(self):
class_name = self.__class__.__name__
self.engine_args = parse_vllm_args(class_name, "")
@api(name="v1/chat/completions") @api(name="v1/chat/completions")
async def generate(self, request: MultiModalRequest): async def generate(self, request: MultiModalRequest):
if self.engine_args.model != request.model:
return JSONResponse(
{"error": f"Model '{request.model}' not found"},
status_code=404,
)
async def content_generator(): async def content_generator():
async for response in self.processor.generate(request.model_dump_json()): async for response in self.processor.generate(request.model_dump_json()):
try: try:
......
...@@ -18,8 +18,9 @@ import logging ...@@ -18,8 +18,9 @@ import logging
from components.video_processor import Processor from components.video_processor import Processor
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.responses import StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from utils.protocol import MultiModalRequest from utils.protocol import MultiModalRequest
from utils.vllm import parse_vllm_args
from dynamo.sdk import DYNAMO_IMAGE, api, depends, service from dynamo.sdk import DYNAMO_IMAGE, api, depends, service
...@@ -38,8 +39,18 @@ logger = logging.getLogger(__name__) ...@@ -38,8 +39,18 @@ logger = logging.getLogger(__name__)
class Frontend: class Frontend:
processor = depends(Processor) processor = depends(Processor)
def __init__(self):
class_name = self.__class__.__name__
self.engine_args = parse_vllm_args(class_name, "")
@api(name="v1/chat/completions") @api(name="v1/chat/completions")
async def generate(self, request: MultiModalRequest): async def generate(self, request: MultiModalRequest):
if self.engine_args.model != request.model:
return JSONResponse(
{"error": f"Model '{request.model}' not found"},
status_code=404,
)
async def content_generator(): async def content_generator():
async for response in self.processor.generate(request.model_dump_json()): async for response in self.processor.generate(request.model_dump_json()):
try: try:
......
...@@ -17,6 +17,9 @@ Common: ...@@ -17,6 +17,9 @@ Common:
block-size: 64 block-size: 64
max-model-len: 4096 max-model-len: 4096
Frontend:
common-configs: [model]
Processor: Processor:
router: round-robin router: round-robin
prompt-template: "USER: <image>\n<prompt> ASSISTANT:" prompt-template: "USER: <image>\n<prompt> ASSISTANT:"
......
...@@ -18,6 +18,9 @@ Common: ...@@ -18,6 +18,9 @@ Common:
max-model-len: 4096 max-model-len: 4096
trust-remote-code: true trust-remote-code: true
Frontend:
common-configs: [model]
Processor: Processor:
router: round-robin router: round-robin
prompt-template: "<|user|>\n<|image_1|>\n<prompt><|end|>\n<|assistant|>\n" prompt-template: "<|user|>\n<|image_1|>\n<prompt><|end|>\n<|assistant|>\n"
......
...@@ -17,6 +17,9 @@ Common: ...@@ -17,6 +17,9 @@ Common:
block-size: 64 block-size: 64
max-model-len: 4096 max-model-len: 4096
Frontend:
common-configs: [model]
Processor: Processor:
router: round-robin router: round-robin
prompt-template: "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|><prompt><|im_end|>\n<|im_start|>assistant\n" prompt-template: "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|><prompt><|im_end|>\n<|im_start|>assistant\n"
......
...@@ -24,6 +24,9 @@ Common: ...@@ -24,6 +24,9 @@ Common:
video-token-id: 32000 video-token-id: 32000
dummy-tokens-per-frame: 144 dummy-tokens-per-frame: 144
Frontend:
common-configs: [model]
Processor: Processor:
router: round-robin router: round-robin
common-configs: [model, block-size, max-model-len] common-configs: [model, block-size, max-model-len]
......
...@@ -20,6 +20,9 @@ Common: ...@@ -20,6 +20,9 @@ Common:
num-patches: 576 num-patches: 576
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}' kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend:
common-configs: [model]
Processor: Processor:
router: round-robin router: round-robin
prompt-template: "USER: <image>\n<prompt> ASSISTANT:" prompt-template: "USER: <image>\n<prompt> ASSISTANT:"
......
...@@ -25,6 +25,9 @@ Common: ...@@ -25,6 +25,9 @@ Common:
video-token-id: 32000 video-token-id: 32000
dummy-tokens-per-frame: 144 dummy-tokens-per-frame: 144
Frontend:
common-configs: [model]
Processor: Processor:
router: round-robin router: round-robin
common-configs: [model, block-size] common-configs: [model, block-size]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment