"docs/source/vscode:/vscode.git/clone" did not exist on "a8f563dbf8520020054aa01f5ae169999775fd19"
Unverified Commit 033b75f5 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update serving_base.py, serving_chat.py, servin... (20250910) (#10282)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarcctry <shiyang@x.ai>
parent f3b5db6e
from __future__ import annotations
import json
import logging
import uuid
from abc import ABC, abstractmethod
from typing import Any, Optional, Union
from typing import TYPE_CHECKING, Any, Optional, Union
from fastapi import HTTPException, Request
from fastapi.responses import ORJSONResponse, StreamingResponse
from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.tokenizer_manager import TokenizerManager
if TYPE_CHECKING:
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__)
......
from __future__ import annotations
import copy
import json
import logging
import time
import uuid
from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse
......@@ -33,13 +35,15 @@ from sglang.srt.entrypoints.openai.utils import (
)
from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.conversation import generate_chat_conv
from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.utils import convert_json_schema_to_str
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__)
......
from __future__ import annotations
import logging
import time
from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse
......@@ -20,13 +22,15 @@ from sglang.srt.entrypoints.openai.utils import (
to_openai_style_logprobs,
)
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.code_completion_parser import (
generate_completion_prompt_from_request,
)
from sglang.utils import convert_json_schema_to_str
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__)
......
from typing import Any, Dict, List, Optional, Union
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse
......@@ -13,10 +15,12 @@ from sglang.srt.entrypoints.openai.protocol import (
)
from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
from sglang.srt.managers.io_struct import EmbeddingReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.conversation import generate_embedding_convs
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
class OpenAIServingEmbedding(OpenAIServingBase):
"""Handler for v1/embeddings requests"""
......
# SPDX-License-Identifier: Apache-2.0
# Adapted from vLLM's OpenAIServingResponses
"""Handler for /v1/responses requests"""
from __future__ import annotations
import asyncio
import copy
......@@ -9,7 +10,7 @@ import logging
import time
from contextlib import AsyncExitStack
from http import HTTPStatus
from typing import Any, AsyncGenerator, AsyncIterator, Optional, Union
from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterator, Optional, Union
import jinja2
import openai.types.responses as openai_responses_types
......@@ -54,11 +55,13 @@ from sglang.srt.entrypoints.openai.protocol import (
from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat
from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import random_uuid
if TYPE_CHECKING:
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment