Unverified Commit 60e37f80 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Move parsers under a single folder (#9912)

parent 369b1433
......@@ -313,7 +313,7 @@
"outputs": [],
"source": [
"import sglang as sgl\n",
"from sglang.srt.reasoning_parser import ReasoningParser\n",
"from sglang.srt.parser.reasoning_parser import ReasoningParser\n",
"from sglang.utils import print_highlight\n",
"\n",
"llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n",
......
......@@ -44,7 +44,7 @@
"import requests\n",
"from PIL import Image\n",
"\n",
"from sglang.srt.conversation import chat_templates\n",
"from sglang.srt.parser.conversation import chat_templates\n",
"\n",
"image = Image.open(\n",
" BytesIO(\n",
......@@ -182,7 +182,7 @@
"import requests\n",
"from PIL import Image\n",
"\n",
"from sglang.srt.conversation import chat_templates\n",
"from sglang.srt.parser.conversation import chat_templates\n",
"\n",
"image = Image.open(\n",
" BytesIO(\n",
......
......@@ -7,7 +7,7 @@ import argparse
import dataclasses
import sglang as sgl
from sglang.srt.conversation import chat_templates
from sglang.srt.parser.conversation import chat_templates
from sglang.srt.server_args import ServerArgs
......
......@@ -740,7 +740,7 @@ class StreamExecutor:
# Execute the stored lazy generation calls
self.backend.role_end_generate(self)
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.parser.reasoning_parser import ReasoningParser
reasoning_parser = ReasoningParser(expr.model_type)
other = expr.expr
......
......@@ -102,7 +102,7 @@ from sglang.srt.managers.multi_tokenizer_mixin import (
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import ServerStatus, TokenizerManager
from sglang.srt.metrics.func_timer import enable_func_timer
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import (
add_api_key_middleware,
......
......@@ -8,7 +8,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse
from sglang.srt.conversation import generate_chat_conv
from sglang.srt.entrypoints.openai.protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
......@@ -33,11 +32,12 @@ from sglang.srt.entrypoints.openai.utils import (
to_openai_style_logprobs,
)
from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.jinja_template_utils import process_content_for_template_format
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.parser.conversation import generate_chat_conv
from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.utils import convert_json_schema_to_str
logger = logging.getLogger(__name__)
......
......@@ -5,7 +5,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse
from sglang.srt.code_completion_parser import generate_completion_prompt_from_request
from sglang.srt.entrypoints.openai.protocol import (
CompletionRequest,
CompletionResponse,
......@@ -23,6 +22,9 @@ from sglang.srt.entrypoints.openai.utils import (
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.code_completion_parser import (
generate_completion_prompt_from_request,
)
from sglang.utils import convert_json_schema_to_str
logger = logging.getLogger(__name__)
......
......@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse
from sglang.srt.conversation import generate_embedding_convs
from sglang.srt.entrypoints.openai.protocol import (
EmbeddingObject,
EmbeddingRequest,
......@@ -16,6 +15,7 @@ from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
from sglang.srt.managers.io_struct import EmbeddingReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.conversation import generate_embedding_convs
class OpenAIServingEmbedding(OpenAIServingBase):
......
......@@ -56,7 +56,7 @@ from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import random_uuid
logger = logging.getLogger(__name__)
......
......@@ -10,7 +10,7 @@ from sglang.srt.function_call.core_types import (
ToolCallItem,
_GetInfoFunc,
)
from sglang.srt.harmony_parser import HarmonyParser
from sglang.srt.parser.harmony_parser import HarmonyParser
logger = logging.getLogger(__name__)
......
......@@ -162,7 +162,6 @@ def get_deepep_config() -> str:
def is_tbo_enabled() -> bool:
global IS_TBO_ENABLED
if IS_TBO_ENABLED is None:
logger.warning("IS_TBO_ENABLED is not initialized, using False")
IS_TBO_ENABLED = False
return IS_TBO_ENABLED
......
......@@ -141,7 +141,7 @@ from sglang.srt.mem_cache.lora_radix_cache import LoRARadixCache
from sglang.srt.mem_cache.radix_cache import RadixCache
from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache
from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
......
......@@ -24,20 +24,20 @@ import os
import re
from typing import Optional
from sglang.srt.code_completion_parser import (
from sglang.srt.parser.code_completion_parser import (
CompletionTemplate,
FimPosition,
completion_template_exists,
register_completion_template,
)
from sglang.srt.conversation import (
from sglang.srt.parser.conversation import (
Conversation,
SeparatorStyle,
chat_template_exists,
get_conv_template_by_model_path,
register_conv_template,
)
from sglang.srt.jinja_template_utils import detect_jinja_template_content_format
from sglang.srt.parser.jinja_template_utils import detect_jinja_template_content_format
logger = logging.getLogger(__name__)
......
......@@ -1655,7 +1655,7 @@ class ModelRunner:
def apply_torch_tp(self):
logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.")
from sglang.srt.model_parallel import tensor_parallel
from sglang.srt.layers.model_parallel import tensor_parallel
device_mesh = torch.distributed.init_device_mesh(self.device, (self.tp_size,))
tensor_parallel(self.model, device_mesh)
......
......@@ -22,7 +22,7 @@ Reference: https://pytorch.org/docs/stable/distributed.tensor.parallel.html
Here is a quick example to enable TP:
```python
from sglang.srt.model_parallel import tensor_parallel
from sglang.srt.layers.model_parallel import tensor_parallel
device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,))
tensor_parallel(model, device_mesh)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment