Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
fdc4e1e5
"git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "2de24fbf7a26e679e8dc7d185addd3dc820f347c"
Unverified
Commit
fdc4e1e5
authored
Oct 03, 2025
by
fzyzcjy
Committed by
GitHub
Oct 03, 2025
Browse files
Tiny move files to utils folder (#11166)
parent
04b86b3c
Changes
66
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
36 additions
and
36 deletions
+36
-36
benchmark/json_schema/bench_sglang.py
benchmark/json_schema/bench_sglang.py
+1
-1
benchmark/multi_turn_chat/long_prompt_multi_turn.py
benchmark/multi_turn_chat/long_prompt_multi_turn.py
+1
-1
examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py
...ntime/token_in_token_out/token_in_token_out_llm_engine.py
+1
-1
examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
...ntime/token_in_token_out/token_in_token_out_llm_server.py
+1
-1
python/sglang/bench_one_batch.py
python/sglang/bench_one_batch.py
+1
-1
python/sglang/bench_serving.py
python/sglang/bench_serving.py
+1
-1
python/sglang/lang/backend/runtime_endpoint.py
python/sglang/lang/backend/runtime_endpoint.py
+1
-1
python/sglang/srt/configs/model_config.py
python/sglang/srt/configs/model_config.py
+4
-4
python/sglang/srt/lora/lora.py
python/sglang/srt/lora/lora.py
+1
-1
python/sglang/srt/lora/lora_manager.py
python/sglang/srt/lora/lora_manager.py
+1
-1
python/sglang/srt/lora/mem_pool.py
python/sglang/srt/lora/mem_pool.py
+1
-1
python/sglang/srt/lora/utils.py
python/sglang/srt/lora/utils.py
+1
-1
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+1
-1
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+5
-5
python/sglang/srt/managers/scheduler_input_blocker.py
python/sglang/srt/managers/scheduler_input_blocker.py
+1
-1
python/sglang/srt/managers/scheduler_profiler_mixin.py
python/sglang/srt/managers/scheduler_profiler_mixin.py
+1
-1
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+5
-5
python/sglang/srt/managers/tp_worker.py
python/sglang/srt/managers/tp_worker.py
+6
-6
python/sglang/srt/model_executor/cpu_graph_runner.py
python/sglang/srt/model_executor/cpu_graph_runner.py
+1
-1
python/sglang/srt/model_executor/cuda_graph_runner.py
python/sglang/srt/model_executor/cuda_graph_runner.py
+1
-1
No files found.
benchmark/json_schema/bench_sglang.py
View file @
fdc4e1e5
...
...
@@ -8,7 +8,7 @@ from datasets import load_dataset
import
sglang
as
sgl
from
sglang.global_config
import
global_config
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
from
sglang.test.test_utils
import
(
add_common_sglang_args_and_parse
,
select_sglang_backend
,
...
...
benchmark/multi_turn_chat/long_prompt_multi_turn.py
View file @
fdc4e1e5
...
...
@@ -7,7 +7,7 @@ from pathlib import Path
from
tqdm
import
tqdm
import
sglang
as
sgl
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
from
sglang.test.test_utils
import
(
add_common_sglang_args_and_parse
,
select_sglang_backend
,
...
...
examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py
View file @
fdc4e1e5
...
...
@@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o
"""
import
sglang
as
sgl
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
MODEL_PATH
=
"meta-llama/Llama-3.1-8B-Instruct"
...
...
examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
View file @
fdc4e1e5
...
...
@@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py
import
requests
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
from
sglang.test.test_utils
import
is_in_ci
from
sglang.utils
import
terminate_process
,
wait_for_server
...
...
python/sglang/bench_one_batch.py
View file @
fdc4e1e5
...
...
@@ -60,7 +60,6 @@ import torch.distributed as dist
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.distributed.parallel_state
import
destroy_distributed_environment
from
sglang.srt.entrypoints.engine
import
_set_envs_and_config
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.layers.moe
import
initialize_moe_config
from
sglang.srt.managers.schedule_batch
import
Req
,
ScheduleBatch
from
sglang.srt.managers.scheduler
import
Scheduler
...
...
@@ -78,6 +77,7 @@ from sglang.srt.utils import (
set_gpu_proc_affinity
,
suppress_other_loggers
,
)
from
sglang.srt.utils.hf_transformers_utils
import
get_tokenizer
@
dataclasses
.
dataclass
...
...
python/sglang/bench_serving.py
View file @
fdc4e1e5
...
...
@@ -635,7 +635,7 @@ def get_tokenizer(
if
pretrained_model_name_or_path
.
endswith
(
".json"
)
or
pretrained_model_name_or_path
.
endswith
(
".model"
):
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
return
get_tokenizer
(
pretrained_model_name_or_path
)
...
...
python/sglang/lang/backend/runtime_endpoint.py
View file @
fdc4e1e5
...
...
@@ -433,7 +433,7 @@ class Runtime:
self
.
endpoint
.
cache_prefix
(
prefix
)
def
get_tokenizer
(
self
):
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
return
get_tokenizer
(
self
.
server_args
.
tokenizer_path
,
...
...
python/sglang/srt/configs/model_config.py
View file @
fdc4e1e5
...
...
@@ -23,16 +23,16 @@ import torch
from
transformers
import
PretrainedConfig
from
sglang.srt.environ
import
envs
from
sglang.srt.hf_transformers_utils
import
(
from
sglang.srt.layers.quantization
import
QUANTIZATION_METHODS
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
is_hip
,
retry
from
sglang.srt.utils.hf_transformers_utils
import
(
get_config
,
get_context_length
,
get_generation_config
,
get_hf_text_config
,
get_sparse_attention_config
,
)
from
sglang.srt.layers.quantization
import
QUANTIZATION_METHODS
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
is_hip
,
retry
from
sglang.utils
import
is_in_ci
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/lora.py
View file @
fdc4e1e5
...
...
@@ -26,12 +26,12 @@ import torch
from
torch
import
nn
from
sglang.srt.configs.load_config
import
LoadConfig
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.lora.backend.base_backend
import
BaseLoRABackend
from
sglang.srt.lora.backend.chunked_backend
import
ChunkedSgmvLoRABackend
from
sglang.srt.lora.backend.triton_backend
import
TritonLoRABackend
from
sglang.srt.lora.lora_config
import
LoRAConfig
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
from
sglang.srt.utils.hf_transformers_utils
import
AutoConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/lora_manager.py
View file @
fdc4e1e5
...
...
@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple
import
torch
from
sglang.srt.configs.load_config
import
LoadConfig
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.lora.backend.base_backend
import
BaseLoRABackend
,
get_backend_from_name
from
sglang.srt.lora.layers
import
BaseLayerWithLoRA
,
get_lora_layer
from
sglang.srt.lora.lora
import
LoRAAdapter
...
...
@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
replace_submodule
from
sglang.srt.utils.hf_transformers_utils
import
AutoConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/mem_pool.py
View file @
fdc4e1e5
...
...
@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import
torch
from
sglang.srt.distributed
import
divide
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.lora.layers
import
BaseLayerWithLoRA
from
sglang.srt.lora.lora
import
LoRAAdapter
from
sglang.srt.lora.lora_config
import
LoRAConfig
...
...
@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import (
get_stacked_multiply
,
get_target_module_name
,
)
from
sglang.srt.utils.hf_transformers_utils
import
AutoConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/utils.py
View file @
fdc4e1e5
...
...
@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple
import
torch
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.
utils.
hf_transformers_utils
import
AutoConfig
@
dataclass
...
...
python/sglang/srt/managers/detokenizer_manager.py
View file @
fdc4e1e5
...
...
@@ -24,7 +24,6 @@ import psutil
import
setproctitle
import
zmq
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.managers.io_struct
import
(
BatchEmbeddingOutput
,
BatchMultimodalDecodeReq
,
...
...
@@ -42,6 +41,7 @@ from sglang.srt.utils import (
get_zmq_socket
,
kill_itself_when_parent_died
,
)
from
sglang.srt.utils.hf_transformers_utils
import
get_tokenizer
from
sglang.utils
import
(
TypeBasedDispatcher
,
find_printable_text
,
...
...
python/sglang/srt/managers/scheduler.py
View file @
fdc4e1e5
...
...
@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import (
)
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.layers.dp_attention
import
compute_dp_attention_world_info
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.layers.moe
import
initialize_moe_config
...
...
@@ -190,6 +185,11 @@ from sglang.srt.utils import (
set_random_seed
,
suppress_other_loggers
,
)
from
sglang.srt.utils.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.utils
import
TypeBasedDispatcher
,
get_exception_traceback
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/managers/scheduler_input_blocker.py
View file @
fdc4e1e5
...
...
@@ -17,7 +17,7 @@ from enum import Enum, auto
from
typing
import
Any
,
List
,
Optional
from
sglang.srt.managers.io_struct
import
BlockReqInput
,
BlockReqType
from
sglang.srt.poll_based_barrier
import
PollBasedBarrier
from
sglang.srt.
utils.
poll_based_barrier
import
PollBasedBarrier
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/managers/scheduler_profiler_mixin.py
View file @
fdc4e1e5
...
...
@@ -204,7 +204,7 @@ class SchedulerProfilerMixin:
torch
.
distributed
.
barrier
(
self
.
tp_cpu_group
)
if
self
.
tp_rank
==
0
:
from
sglang.srt.rpd_utils
import
rpd_to_chrome_trace
from
sglang.srt.
utils.
rpd_utils
import
rpd_to_chrome_trace
rpd_to_chrome_trace
(
"trace.rpd"
,
self
.
rpd_profile_path
)
self
.
rpd_profiler
=
None
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
fdc4e1e5
...
...
@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks
from
sglang.srt.aio_rwlock
import
RWLock
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.disaggregation.utils
import
DisaggregationMode
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.lora.lora_registry
import
LoRARegistry
from
sglang.srt.managers.async_dynamic_batch_tokenizer
import
AsyncDynamicbatchTokenizer
from
sglang.srt.managers.disagg_service
import
start_disagg_service
...
...
@@ -99,6 +94,11 @@ from sglang.srt.utils import (
get_zmq_socket
,
kill_process_tree
,
)
from
sglang.srt.utils.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.utils
import
TypeBasedDispatcher
,
get_exception_traceback
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
...
...
python/sglang/srt/managers/tp_worker.py
View file @
fdc4e1e5
...
...
@@ -22,11 +22,6 @@ import torch
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.managers.io_struct
import
(
DestroyWeightsUpdateGroupReqInput
,
...
...
@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors
,
)
from
sglang.srt.model_executor.model_runner
import
ModelRunner
from
sglang.srt.patch_torch
import
monkey_patch_torch_reductions
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
MultiprocessingSerializer
,
broadcast_pyobj
,
set_random_seed
from
sglang.srt.utils.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.utils.patch_torch
import
monkey_patch_torch_reductions
if
TYPE_CHECKING
:
from
sglang.srt.managers.cache_controller
import
LayerDoneCounter
...
...
python/sglang/srt/model_executor/cpu_graph_runner.py
View file @
fdc4e1e5
...
...
@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import (
ForwardMode
,
PPProxyTensors
,
)
from
sglang.srt.patch_torch
import
monkey_patch_torch_compile
from
sglang.srt.speculative.spec_info
import
SpeculativeAlgorithm
from
sglang.srt.utils
import
(
log_info_on_rank0
,
...
...
@@ -43,6 +42,7 @@ from sglang.srt.utils import (
require_mlp_sync
,
require_mlp_tp_gather
,
)
from
sglang.srt.utils.patch_torch
import
monkey_patch_torch_compile
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/model_executor/cuda_graph_runner.py
View file @
fdc4e1e5
...
...
@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors
,
enable_num_token_non_padded
,
)
from
sglang.srt.patch_torch
import
monkey_patch_torch_compile
from
sglang.srt.two_batch_overlap
import
TboCudaGraphRunnerPlugin
from
sglang.srt.utils
import
(
empty_context
,
...
...
@@ -62,6 +61,7 @@ from sglang.srt.utils import (
require_mlp_sync
,
require_mlp_tp_gather
,
)
from
sglang.srt.utils.patch_torch
import
monkey_patch_torch_compile
_is_hip
=
is_hip
()
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment