Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
60e37f80
Unverified
Commit
60e37f80
authored
Sep 02, 2025
by
Lianmin Zheng
Committed by
GitHub
Sep 02, 2025
Browse files
Move parsers under a single folder (#9912)
parent
369b1433
Changes
28
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
21 additions
and
20 deletions
+21
-20
docs/advanced_features/separate_reasoning.ipynb
docs/advanced_features/separate_reasoning.ipynb
+1
-1
docs/advanced_features/vlm_query.ipynb
docs/advanced_features/vlm_query.ipynb
+2
-2
examples/runtime/engine/offline_batch_inference_vlm.py
examples/runtime/engine/offline_batch_inference_vlm.py
+1
-1
python/sglang/lang/interpreter.py
python/sglang/lang/interpreter.py
+1
-1
python/sglang/srt/entrypoints/http_server.py
python/sglang/srt/entrypoints/http_server.py
+1
-1
python/sglang/srt/entrypoints/openai/serving_chat.py
python/sglang/srt/entrypoints/openai/serving_chat.py
+3
-3
python/sglang/srt/entrypoints/openai/serving_completions.py
python/sglang/srt/entrypoints/openai/serving_completions.py
+3
-1
python/sglang/srt/entrypoints/openai/serving_embedding.py
python/sglang/srt/entrypoints/openai/serving_embedding.py
+1
-1
python/sglang/srt/entrypoints/openai/serving_responses.py
python/sglang/srt/entrypoints/openai/serving_responses.py
+1
-1
python/sglang/srt/function_call/gpt_oss_detector.py
python/sglang/srt/function_call/gpt_oss_detector.py
+1
-1
python/sglang/srt/layers/model_parallel.py
python/sglang/srt/layers/model_parallel.py
+0
-0
python/sglang/srt/layers/moe/utils.py
python/sglang/srt/layers/moe/utils.py
+0
-1
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+1
-1
python/sglang/srt/managers/template_manager.py
python/sglang/srt/managers/template_manager.py
+3
-3
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+1
-1
python/sglang/srt/models/torch_native_llama.py
python/sglang/srt/models/torch_native_llama.py
+1
-1
python/sglang/srt/parser/code_completion_parser.py
python/sglang/srt/parser/code_completion_parser.py
+0
-0
python/sglang/srt/parser/conversation.py
python/sglang/srt/parser/conversation.py
+0
-0
python/sglang/srt/parser/harmony_parser.py
python/sglang/srt/parser/harmony_parser.py
+0
-0
python/sglang/srt/parser/jinja_template_utils.py
python/sglang/srt/parser/jinja_template_utils.py
+0
-0
No files found.
docs/advanced_features/separate_reasoning.ipynb
View file @
60e37f80
...
@@ -313,7 +313,7 @@
...
@@ -313,7 +313,7 @@
"outputs": [],
"outputs": [],
"source": [
"source": [
"import sglang as sgl\n",
"import sglang as sgl\n",
"from sglang.srt.reasoning_parser import ReasoningParser\n",
"from sglang.srt.
parser.
reasoning_parser import ReasoningParser\n",
"from sglang.utils import print_highlight\n",
"from sglang.utils import print_highlight\n",
"\n",
"\n",
"llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n",
"llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n",
...
...
docs/advanced_features/vlm_query.ipynb
View file @
60e37f80
...
@@ -44,7 +44,7 @@
...
@@ -44,7 +44,7 @@
"import requests\n",
"import requests\n",
"from PIL import Image\n",
"from PIL import Image\n",
"\n",
"\n",
"from sglang.srt.conversation import chat_templates\n",
"from sglang.srt.
parser.
conversation import chat_templates\n",
"\n",
"\n",
"image = Image.open(\n",
"image = Image.open(\n",
" BytesIO(\n",
" BytesIO(\n",
...
@@ -182,7 +182,7 @@
...
@@ -182,7 +182,7 @@
"import requests\n",
"import requests\n",
"from PIL import Image\n",
"from PIL import Image\n",
"\n",
"\n",
"from sglang.srt.conversation import chat_templates\n",
"from sglang.srt.
parser.
conversation import chat_templates\n",
"\n",
"\n",
"image = Image.open(\n",
"image = Image.open(\n",
" BytesIO(\n",
" BytesIO(\n",
...
...
examples/runtime/engine/offline_batch_inference_vlm.py
View file @
60e37f80
...
@@ -7,7 +7,7 @@ import argparse
...
@@ -7,7 +7,7 @@ import argparse
import
dataclasses
import
dataclasses
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.srt.conversation
import
chat_templates
from
sglang.srt.
parser.
conversation
import
chat_templates
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
...
...
python/sglang/lang/interpreter.py
View file @
60e37f80
...
@@ -740,7 +740,7 @@ class StreamExecutor:
...
@@ -740,7 +740,7 @@ class StreamExecutor:
# Execute the stored lazy generation calls
# Execute the stored lazy generation calls
self
.
backend
.
role_end_generate
(
self
)
self
.
backend
.
role_end_generate
(
self
)
from
sglang.srt.reasoning_parser
import
ReasoningParser
from
sglang.srt.
parser.
reasoning_parser
import
ReasoningParser
reasoning_parser
=
ReasoningParser
(
expr
.
model_type
)
reasoning_parser
=
ReasoningParser
(
expr
.
model_type
)
other
=
expr
.
expr
other
=
expr
.
expr
...
...
python/sglang/srt/entrypoints/http_server.py
View file @
60e37f80
...
@@ -102,7 +102,7 @@ from sglang.srt.managers.multi_tokenizer_mixin import (
...
@@ -102,7 +102,7 @@ from sglang.srt.managers.multi_tokenizer_mixin import (
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.tokenizer_manager
import
ServerStatus
,
TokenizerManager
from
sglang.srt.managers.tokenizer_manager
import
ServerStatus
,
TokenizerManager
from
sglang.srt.metrics.func_timer
import
enable_func_timer
from
sglang.srt.metrics.func_timer
import
enable_func_timer
from
sglang.srt.reasoning_parser
import
ReasoningParser
from
sglang.srt.
parser.
reasoning_parser
import
ReasoningParser
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
add_api_key_middleware
,
add_api_key_middleware
,
...
...
python/sglang/srt/entrypoints/openai/serving_chat.py
View file @
60e37f80
...
@@ -8,7 +8,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
...
@@ -8,7 +8,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from
fastapi
import
Request
from
fastapi
import
Request
from
fastapi.responses
import
ORJSONResponse
,
StreamingResponse
from
fastapi.responses
import
ORJSONResponse
,
StreamingResponse
from
sglang.srt.conversation
import
generate_chat_conv
from
sglang.srt.entrypoints.openai.protocol
import
(
from
sglang.srt.entrypoints.openai.protocol
import
(
ChatCompletionRequest
,
ChatCompletionRequest
,
ChatCompletionResponse
,
ChatCompletionResponse
,
...
@@ -33,11 +32,12 @@ from sglang.srt.entrypoints.openai.utils import (
...
@@ -33,11 +32,12 @@ from sglang.srt.entrypoints.openai.utils import (
to_openai_style_logprobs
,
to_openai_style_logprobs
,
)
)
from
sglang.srt.function_call.function_call_parser
import
FunctionCallParser
from
sglang.srt.function_call.function_call_parser
import
FunctionCallParser
from
sglang.srt.jinja_template_utils
import
process_content_for_template_format
from
sglang.srt.managers.io_struct
import
GenerateReqInput
from
sglang.srt.managers.io_struct
import
GenerateReqInput
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.reasoning_parser
import
ReasoningParser
from
sglang.srt.parser.conversation
import
generate_chat_conv
from
sglang.srt.parser.jinja_template_utils
import
process_content_for_template_format
from
sglang.srt.parser.reasoning_parser
import
ReasoningParser
from
sglang.utils
import
convert_json_schema_to_str
from
sglang.utils
import
convert_json_schema_to_str
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/entrypoints/openai/serving_completions.py
View file @
60e37f80
...
@@ -5,7 +5,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
...
@@ -5,7 +5,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from
fastapi
import
Request
from
fastapi
import
Request
from
fastapi.responses
import
ORJSONResponse
,
StreamingResponse
from
fastapi.responses
import
ORJSONResponse
,
StreamingResponse
from
sglang.srt.code_completion_parser
import
generate_completion_prompt_from_request
from
sglang.srt.entrypoints.openai.protocol
import
(
from
sglang.srt.entrypoints.openai.protocol
import
(
CompletionRequest
,
CompletionRequest
,
CompletionResponse
,
CompletionResponse
,
...
@@ -23,6 +22,9 @@ from sglang.srt.entrypoints.openai.utils import (
...
@@ -23,6 +22,9 @@ from sglang.srt.entrypoints.openai.utils import (
from
sglang.srt.managers.io_struct
import
GenerateReqInput
from
sglang.srt.managers.io_struct
import
GenerateReqInput
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.parser.code_completion_parser
import
(
generate_completion_prompt_from_request
,
)
from
sglang.utils
import
convert_json_schema_to_str
from
sglang.utils
import
convert_json_schema_to_str
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/entrypoints/openai/serving_embedding.py
View file @
60e37f80
...
@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Union
...
@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Union
from
fastapi
import
Request
from
fastapi
import
Request
from
fastapi.responses
import
ORJSONResponse
from
fastapi.responses
import
ORJSONResponse
from
sglang.srt.conversation
import
generate_embedding_convs
from
sglang.srt.entrypoints.openai.protocol
import
(
from
sglang.srt.entrypoints.openai.protocol
import
(
EmbeddingObject
,
EmbeddingObject
,
EmbeddingRequest
,
EmbeddingRequest
,
...
@@ -16,6 +15,7 @@ from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
...
@@ -16,6 +15,7 @@ from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
from
sglang.srt.managers.io_struct
import
EmbeddingReqInput
from
sglang.srt.managers.io_struct
import
EmbeddingReqInput
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.parser.conversation
import
generate_embedding_convs
class
OpenAIServingEmbedding
(
OpenAIServingBase
):
class
OpenAIServingEmbedding
(
OpenAIServingBase
):
...
...
python/sglang/srt/entrypoints/openai/serving_responses.py
View file @
60e37f80
...
@@ -56,7 +56,7 @@ from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
...
@@ -56,7 +56,7 @@ from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
from
sglang.srt.managers.io_struct
import
GenerateReqInput
from
sglang.srt.managers.io_struct
import
GenerateReqInput
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.template_manager
import
TemplateManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.reasoning_parser
import
ReasoningParser
from
sglang.srt.
parser.
reasoning_parser
import
ReasoningParser
from
sglang.srt.utils
import
random_uuid
from
sglang.srt.utils
import
random_uuid
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/function_call/gpt_oss_detector.py
View file @
60e37f80
...
@@ -10,7 +10,7 @@ from sglang.srt.function_call.core_types import (
...
@@ -10,7 +10,7 @@ from sglang.srt.function_call.core_types import (
ToolCallItem
,
ToolCallItem
,
_GetInfoFunc
,
_GetInfoFunc
,
)
)
from
sglang.srt.harmony_parser
import
HarmonyParser
from
sglang.srt.
parser.
harmony_parser
import
HarmonyParser
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/model_parallel.py
→
python/sglang/srt/
layers/
model_parallel.py
View file @
60e37f80
File moved
python/sglang/srt/layers/moe/utils.py
View file @
60e37f80
...
@@ -162,7 +162,6 @@ def get_deepep_config() -> str:
...
@@ -162,7 +162,6 @@ def get_deepep_config() -> str:
def
is_tbo_enabled
()
->
bool
:
def
is_tbo_enabled
()
->
bool
:
global
IS_TBO_ENABLED
global
IS_TBO_ENABLED
if
IS_TBO_ENABLED
is
None
:
if
IS_TBO_ENABLED
is
None
:
logger
.
warning
(
"IS_TBO_ENABLED is not initialized, using False"
)
IS_TBO_ENABLED
=
False
IS_TBO_ENABLED
=
False
return
IS_TBO_ENABLED
return
IS_TBO_ENABLED
...
...
python/sglang/srt/managers/scheduler.py
View file @
60e37f80
...
@@ -141,7 +141,7 @@ from sglang.srt.mem_cache.lora_radix_cache import LoRARadixCache
...
@@ -141,7 +141,7 @@ from sglang.srt.mem_cache.lora_radix_cache import LoRARadixCache
from
sglang.srt.mem_cache.radix_cache
import
RadixCache
from
sglang.srt.mem_cache.radix_cache
import
RadixCache
from
sglang.srt.mem_cache.swa_radix_cache
import
SWARadixCache
from
sglang.srt.mem_cache.swa_radix_cache
import
SWARadixCache
from
sglang.srt.model_executor.forward_batch_info
import
ForwardMode
,
PPProxyTensors
from
sglang.srt.model_executor.forward_batch_info
import
ForwardMode
,
PPProxyTensors
from
sglang.srt.reasoning_parser
import
ReasoningParser
from
sglang.srt.
parser.
reasoning_parser
import
ReasoningParser
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.speculative.spec_info
import
SpeculativeAlgorithm
from
sglang.srt.speculative.spec_info
import
SpeculativeAlgorithm
from
sglang.srt.torch_memory_saver_adapter
import
TorchMemorySaverAdapter
from
sglang.srt.torch_memory_saver_adapter
import
TorchMemorySaverAdapter
...
...
python/sglang/srt/managers/template_manager.py
View file @
60e37f80
...
@@ -24,20 +24,20 @@ import os
...
@@ -24,20 +24,20 @@ import os
import
re
import
re
from
typing
import
Optional
from
typing
import
Optional
from
sglang.srt.code_completion_parser
import
(
from
sglang.srt.
parser.
code_completion_parser
import
(
CompletionTemplate
,
CompletionTemplate
,
FimPosition
,
FimPosition
,
completion_template_exists
,
completion_template_exists
,
register_completion_template
,
register_completion_template
,
)
)
from
sglang.srt.conversation
import
(
from
sglang.srt.
parser.
conversation
import
(
Conversation
,
Conversation
,
SeparatorStyle
,
SeparatorStyle
,
chat_template_exists
,
chat_template_exists
,
get_conv_template_by_model_path
,
get_conv_template_by_model_path
,
register_conv_template
,
register_conv_template
,
)
)
from
sglang.srt.jinja_template_utils
import
detect_jinja_template_content_format
from
sglang.srt.
parser.
jinja_template_utils
import
detect_jinja_template_content_format
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
60e37f80
...
@@ -1655,7 +1655,7 @@ class ModelRunner:
...
@@ -1655,7 +1655,7 @@ class ModelRunner:
def
apply_torch_tp
(
self
):
def
apply_torch_tp
(
self
):
logger
.
info
(
f
"Enabling torch tensor parallelism on
{
self
.
tp_size
}
devices."
)
logger
.
info
(
f
"Enabling torch tensor parallelism on
{
self
.
tp_size
}
devices."
)
from
sglang.srt.model_parallel
import
tensor_parallel
from
sglang.srt.
layers.
model_parallel
import
tensor_parallel
device_mesh
=
torch
.
distributed
.
init_device_mesh
(
self
.
device
,
(
self
.
tp_size
,))
device_mesh
=
torch
.
distributed
.
init_device_mesh
(
self
.
device
,
(
self
.
tp_size
,))
tensor_parallel
(
self
.
model
,
device_mesh
)
tensor_parallel
(
self
.
model
,
device_mesh
)
...
...
python/sglang/srt/models/torch_native_llama.py
View file @
60e37f80
...
@@ -22,7 +22,7 @@ Reference: https://pytorch.org/docs/stable/distributed.tensor.parallel.html
...
@@ -22,7 +22,7 @@ Reference: https://pytorch.org/docs/stable/distributed.tensor.parallel.html
Here is a quick example to enable TP:
Here is a quick example to enable TP:
```python
```python
from sglang.srt.model_parallel import tensor_parallel
from sglang.srt.
layers.
model_parallel import tensor_parallel
device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,))
device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,))
tensor_parallel(model, device_mesh)
tensor_parallel(model, device_mesh)
...
...
python/sglang/srt/code_completion_parser.py
→
python/sglang/srt/
parser/
code_completion_parser.py
View file @
60e37f80
File moved
python/sglang/srt/conversation.py
→
python/sglang/srt/
parser/
conversation.py
View file @
60e37f80
File moved
python/sglang/srt/harmony_parser.py
→
python/sglang/srt/
parser/
harmony_parser.py
View file @
60e37f80
File moved
python/sglang/srt/jinja_template_utils.py
→
python/sglang/srt/
parser/
jinja_template_utils.py
View file @
60e37f80
File moved
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment