Unverified Commit 60e37f80 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Move parsers under a single folder (#9912)

parent 369b1433
...@@ -313,7 +313,7 @@ ...@@ -313,7 +313,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import sglang as sgl\n", "import sglang as sgl\n",
"from sglang.srt.reasoning_parser import ReasoningParser\n", "from sglang.srt.parser.reasoning_parser import ReasoningParser\n",
"from sglang.utils import print_highlight\n", "from sglang.utils import print_highlight\n",
"\n", "\n",
"llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n", "llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n",
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
"import requests\n", "import requests\n",
"from PIL import Image\n", "from PIL import Image\n",
"\n", "\n",
"from sglang.srt.conversation import chat_templates\n", "from sglang.srt.parser.conversation import chat_templates\n",
"\n", "\n",
"image = Image.open(\n", "image = Image.open(\n",
" BytesIO(\n", " BytesIO(\n",
...@@ -182,7 +182,7 @@ ...@@ -182,7 +182,7 @@
"import requests\n", "import requests\n",
"from PIL import Image\n", "from PIL import Image\n",
"\n", "\n",
"from sglang.srt.conversation import chat_templates\n", "from sglang.srt.parser.conversation import chat_templates\n",
"\n", "\n",
"image = Image.open(\n", "image = Image.open(\n",
" BytesIO(\n", " BytesIO(\n",
......
...@@ -7,7 +7,7 @@ import argparse ...@@ -7,7 +7,7 @@ import argparse
import dataclasses import dataclasses
import sglang as sgl import sglang as sgl
from sglang.srt.conversation import chat_templates from sglang.srt.parser.conversation import chat_templates
from sglang.srt.server_args import ServerArgs from sglang.srt.server_args import ServerArgs
......
...@@ -740,7 +740,7 @@ class StreamExecutor: ...@@ -740,7 +740,7 @@ class StreamExecutor:
# Execute the stored lazy generation calls # Execute the stored lazy generation calls
self.backend.role_end_generate(self) self.backend.role_end_generate(self)
from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.parser.reasoning_parser import ReasoningParser
reasoning_parser = ReasoningParser(expr.model_type) reasoning_parser = ReasoningParser(expr.model_type)
other = expr.expr other = expr.expr
......
...@@ -102,7 +102,7 @@ from sglang.srt.managers.multi_tokenizer_mixin import ( ...@@ -102,7 +102,7 @@ from sglang.srt.managers.multi_tokenizer_mixin import (
from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import ServerStatus, TokenizerManager from sglang.srt.managers.tokenizer_manager import ServerStatus, TokenizerManager
from sglang.srt.metrics.func_timer import enable_func_timer from sglang.srt.metrics.func_timer import enable_func_timer
from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import ( from sglang.srt.utils import (
add_api_key_middleware, add_api_key_middleware,
......
...@@ -8,7 +8,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union ...@@ -8,7 +8,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse from fastapi.responses import ORJSONResponse, StreamingResponse
from sglang.srt.conversation import generate_chat_conv
from sglang.srt.entrypoints.openai.protocol import ( from sglang.srt.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionResponse, ChatCompletionResponse,
...@@ -33,11 +32,12 @@ from sglang.srt.entrypoints.openai.utils import ( ...@@ -33,11 +32,12 @@ from sglang.srt.entrypoints.openai.utils import (
to_openai_style_logprobs, to_openai_style_logprobs,
) )
from sglang.srt.function_call.function_call_parser import FunctionCallParser from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.jinja_template_utils import process_content_for_template_format
from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.parser.conversation import generate_chat_conv
from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.utils import convert_json_schema_to_str from sglang.utils import convert_json_schema_to_str
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -5,7 +5,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union ...@@ -5,7 +5,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse from fastapi.responses import ORJSONResponse, StreamingResponse
from sglang.srt.code_completion_parser import generate_completion_prompt_from_request
from sglang.srt.entrypoints.openai.protocol import ( from sglang.srt.entrypoints.openai.protocol import (
CompletionRequest, CompletionRequest,
CompletionResponse, CompletionResponse,
...@@ -23,6 +22,9 @@ from sglang.srt.entrypoints.openai.utils import ( ...@@ -23,6 +22,9 @@ from sglang.srt.entrypoints.openai.utils import (
from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.code_completion_parser import (
generate_completion_prompt_from_request,
)
from sglang.utils import convert_json_schema_to_str from sglang.utils import convert_json_schema_to_str
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Union ...@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Union
from fastapi import Request from fastapi import Request
from fastapi.responses import ORJSONResponse from fastapi.responses import ORJSONResponse
from sglang.srt.conversation import generate_embedding_convs
from sglang.srt.entrypoints.openai.protocol import ( from sglang.srt.entrypoints.openai.protocol import (
EmbeddingObject, EmbeddingObject,
EmbeddingRequest, EmbeddingRequest,
...@@ -16,6 +15,7 @@ from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase ...@@ -16,6 +15,7 @@ from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
from sglang.srt.managers.io_struct import EmbeddingReqInput from sglang.srt.managers.io_struct import EmbeddingReqInput
from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.parser.conversation import generate_embedding_convs
class OpenAIServingEmbedding(OpenAIServingBase): class OpenAIServingEmbedding(OpenAIServingBase):
......
...@@ -56,7 +56,7 @@ from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer ...@@ -56,7 +56,7 @@ from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import random_uuid from sglang.srt.utils import random_uuid
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -10,7 +10,7 @@ from sglang.srt.function_call.core_types import ( ...@@ -10,7 +10,7 @@ from sglang.srt.function_call.core_types import (
ToolCallItem, ToolCallItem,
_GetInfoFunc, _GetInfoFunc,
) )
from sglang.srt.harmony_parser import HarmonyParser from sglang.srt.parser.harmony_parser import HarmonyParser
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -162,7 +162,6 @@ def get_deepep_config() -> str: ...@@ -162,7 +162,6 @@ def get_deepep_config() -> str:
def is_tbo_enabled() -> bool: def is_tbo_enabled() -> bool:
global IS_TBO_ENABLED global IS_TBO_ENABLED
if IS_TBO_ENABLED is None: if IS_TBO_ENABLED is None:
logger.warning("IS_TBO_ENABLED is not initialized, using False")
IS_TBO_ENABLED = False IS_TBO_ENABLED = False
return IS_TBO_ENABLED return IS_TBO_ENABLED
......
...@@ -141,7 +141,7 @@ from sglang.srt.mem_cache.lora_radix_cache import LoRARadixCache ...@@ -141,7 +141,7 @@ from sglang.srt.mem_cache.lora_radix_cache import LoRARadixCache
from sglang.srt.mem_cache.radix_cache import RadixCache from sglang.srt.mem_cache.radix_cache import RadixCache
from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache
from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
......
...@@ -24,20 +24,20 @@ import os ...@@ -24,20 +24,20 @@ import os
import re import re
from typing import Optional from typing import Optional
from sglang.srt.code_completion_parser import ( from sglang.srt.parser.code_completion_parser import (
CompletionTemplate, CompletionTemplate,
FimPosition, FimPosition,
completion_template_exists, completion_template_exists,
register_completion_template, register_completion_template,
) )
from sglang.srt.conversation import ( from sglang.srt.parser.conversation import (
Conversation, Conversation,
SeparatorStyle, SeparatorStyle,
chat_template_exists, chat_template_exists,
get_conv_template_by_model_path, get_conv_template_by_model_path,
register_conv_template, register_conv_template,
) )
from sglang.srt.jinja_template_utils import detect_jinja_template_content_format from sglang.srt.parser.jinja_template_utils import detect_jinja_template_content_format
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -1655,7 +1655,7 @@ class ModelRunner: ...@@ -1655,7 +1655,7 @@ class ModelRunner:
def apply_torch_tp(self): def apply_torch_tp(self):
logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.") logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.")
from sglang.srt.model_parallel import tensor_parallel from sglang.srt.layers.model_parallel import tensor_parallel
device_mesh = torch.distributed.init_device_mesh(self.device, (self.tp_size,)) device_mesh = torch.distributed.init_device_mesh(self.device, (self.tp_size,))
tensor_parallel(self.model, device_mesh) tensor_parallel(self.model, device_mesh)
......
...@@ -22,7 +22,7 @@ Reference: https://pytorch.org/docs/stable/distributed.tensor.parallel.html ...@@ -22,7 +22,7 @@ Reference: https://pytorch.org/docs/stable/distributed.tensor.parallel.html
Here is a quick example to enable TP: Here is a quick example to enable TP:
```python ```python
from sglang.srt.model_parallel import tensor_parallel from sglang.srt.layers.model_parallel import tensor_parallel
device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,)) device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,))
tensor_parallel(model, device_mesh) tensor_parallel(model, device_mesh)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment