Unverified Commit 033b75f5 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update serving_base.py, serving_chat.py, servin... (20250910) (#10282)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarcctry <shiyang@x.ai>
parent f3b5db6e
from __future__ import annotations
import json import json
import logging import logging
import uuid import uuid
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, Optional, Union from typing import TYPE_CHECKING, Any, Optional, Union
from fastapi import HTTPException, Request from fastapi import HTTPException, Request
from fastapi.responses import ORJSONResponse, StreamingResponse from fastapi.responses import ORJSONResponse, StreamingResponse
from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest
from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.tokenizer_manager import TokenizerManager
if TYPE_CHECKING:
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
from __future__ import annotations
import copy import copy
import json import json
import logging import logging
import time import time
import uuid import uuid
from typing import Any, AsyncGenerator, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse from fastapi.responses import ORJSONResponse, StreamingResponse
...@@ -33,13 +35,15 @@ from sglang.srt.entrypoints.openai.utils import ( ...@@ -33,13 +35,15 @@ from sglang.srt.entrypoints.openai.utils import (
) )
from sglang.srt.function_call.function_call_parser import FunctionCallParser from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.conversation import generate_chat_conv from sglang.srt.parser.conversation import generate_chat_conv
from sglang.srt.parser.jinja_template_utils import process_content_for_template_format from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.utils import convert_json_schema_to_str from sglang.utils import convert_json_schema_to_str
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
from __future__ import annotations
import logging import logging
import time import time
from typing import Any, AsyncGenerator, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse from fastapi.responses import ORJSONResponse, StreamingResponse
...@@ -20,13 +22,15 @@ from sglang.srt.entrypoints.openai.utils import ( ...@@ -20,13 +22,15 @@ from sglang.srt.entrypoints.openai.utils import (
to_openai_style_logprobs, to_openai_style_logprobs,
) )
from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.code_completion_parser import ( from sglang.srt.parser.code_completion_parser import (
generate_completion_prompt_from_request, generate_completion_prompt_from_request,
) )
from sglang.utils import convert_json_schema_to_str from sglang.utils import convert_json_schema_to_str
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
from typing import Any, Dict, List, Optional, Union from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from fastapi import Request from fastapi import Request
from fastapi.responses import ORJSONResponse from fastapi.responses import ORJSONResponse
...@@ -13,10 +15,12 @@ from sglang.srt.entrypoints.openai.protocol import ( ...@@ -13,10 +15,12 @@ from sglang.srt.entrypoints.openai.protocol import (
) )
from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
from sglang.srt.managers.io_struct import EmbeddingReqInput from sglang.srt.managers.io_struct import EmbeddingReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.conversation import generate_embedding_convs from sglang.srt.parser.conversation import generate_embedding_convs
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
class OpenAIServingEmbedding(OpenAIServingBase): class OpenAIServingEmbedding(OpenAIServingBase):
"""Handler for v1/embeddings requests""" """Handler for v1/embeddings requests"""
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# Adapted from vLLM's OpenAIServingResponses # Adapted from vLLM's OpenAIServingResponses
"""Handler for /v1/responses requests""" """Handler for /v1/responses requests"""
from __future__ import annotations
import asyncio import asyncio
import copy import copy
...@@ -9,7 +10,7 @@ import logging ...@@ -9,7 +10,7 @@ import logging
import time import time
from contextlib import AsyncExitStack from contextlib import AsyncExitStack
from http import HTTPStatus from http import HTTPStatus
from typing import Any, AsyncGenerator, AsyncIterator, Optional, Union from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterator, Optional, Union
import jinja2 import jinja2
import openai.types.responses as openai_responses_types import openai.types.responses as openai_responses_types
...@@ -54,11 +55,13 @@ from sglang.srt.entrypoints.openai.protocol import ( ...@@ -54,11 +55,13 @@ from sglang.srt.entrypoints.openai.protocol import (
from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat
from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import random_uuid from sglang.srt.utils import random_uuid
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment