Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
fdc4e1e5
Unverified
Commit
fdc4e1e5
authored
Oct 03, 2025
by
fzyzcjy
Committed by
GitHub
Oct 03, 2025
Browse files
Tiny move files to utils folder (#11166)
parent
04b86b3c
Changes
66
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
36 additions
and
36 deletions
+36
-36
benchmark/json_schema/bench_sglang.py
benchmark/json_schema/bench_sglang.py
+1
-1
benchmark/multi_turn_chat/long_prompt_multi_turn.py
benchmark/multi_turn_chat/long_prompt_multi_turn.py
+1
-1
examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py
...ntime/token_in_token_out/token_in_token_out_llm_engine.py
+1
-1
examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
...ntime/token_in_token_out/token_in_token_out_llm_server.py
+1
-1
python/sglang/bench_one_batch.py
python/sglang/bench_one_batch.py
+1
-1
python/sglang/bench_serving.py
python/sglang/bench_serving.py
+1
-1
python/sglang/lang/backend/runtime_endpoint.py
python/sglang/lang/backend/runtime_endpoint.py
+1
-1
python/sglang/srt/configs/model_config.py
python/sglang/srt/configs/model_config.py
+4
-4
python/sglang/srt/lora/lora.py
python/sglang/srt/lora/lora.py
+1
-1
python/sglang/srt/lora/lora_manager.py
python/sglang/srt/lora/lora_manager.py
+1
-1
python/sglang/srt/lora/mem_pool.py
python/sglang/srt/lora/mem_pool.py
+1
-1
python/sglang/srt/lora/utils.py
python/sglang/srt/lora/utils.py
+1
-1
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+1
-1
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+5
-5
python/sglang/srt/managers/scheduler_input_blocker.py
python/sglang/srt/managers/scheduler_input_blocker.py
+1
-1
python/sglang/srt/managers/scheduler_profiler_mixin.py
python/sglang/srt/managers/scheduler_profiler_mixin.py
+1
-1
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+5
-5
python/sglang/srt/managers/tp_worker.py
python/sglang/srt/managers/tp_worker.py
+6
-6
python/sglang/srt/model_executor/cpu_graph_runner.py
python/sglang/srt/model_executor/cpu_graph_runner.py
+1
-1
python/sglang/srt/model_executor/cuda_graph_runner.py
python/sglang/srt/model_executor/cuda_graph_runner.py
+1
-1
No files found.
benchmark/json_schema/bench_sglang.py
View file @
fdc4e1e5
...
@@ -8,7 +8,7 @@ from datasets import load_dataset
...
@@ -8,7 +8,7 @@ from datasets import load_dataset
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.global_config
import
global_config
from
sglang.global_config
import
global_config
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
add_common_sglang_args_and_parse
,
add_common_sglang_args_and_parse
,
select_sglang_backend
,
select_sglang_backend
,
...
...
benchmark/multi_turn_chat/long_prompt_multi_turn.py
View file @
fdc4e1e5
...
@@ -7,7 +7,7 @@ from pathlib import Path
...
@@ -7,7 +7,7 @@ from pathlib import Path
from
tqdm
import
tqdm
from
tqdm
import
tqdm
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
add_common_sglang_args_and_parse
,
add_common_sglang_args_and_parse
,
select_sglang_backend
,
select_sglang_backend
,
...
...
examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py
View file @
fdc4e1e5
...
@@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o
...
@@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
MODEL_PATH
=
"meta-llama/Llama-3.1-8B-Instruct"
MODEL_PATH
=
"meta-llama/Llama-3.1-8B-Instruct"
...
...
examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
View file @
fdc4e1e5
...
@@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py
...
@@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py
import
requests
import
requests
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
from
sglang.test.test_utils
import
is_in_ci
from
sglang.test.test_utils
import
is_in_ci
from
sglang.utils
import
terminate_process
,
wait_for_server
from
sglang.utils
import
terminate_process
,
wait_for_server
...
...
python/sglang/bench_one_batch.py
View file @
fdc4e1e5
...
@@ -60,7 +60,6 @@ import torch.distributed as dist
...
@@ -60,7 +60,6 @@ import torch.distributed as dist
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.distributed.parallel_state
import
destroy_distributed_environment
from
sglang.srt.distributed.parallel_state
import
destroy_distributed_environment
from
sglang.srt.entrypoints.engine
import
_set_envs_and_config
from
sglang.srt.entrypoints.engine
import
_set_envs_and_config
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.layers.moe
import
initialize_moe_config
from
sglang.srt.layers.moe
import
initialize_moe_config
from
sglang.srt.managers.schedule_batch
import
Req
,
ScheduleBatch
from
sglang.srt.managers.schedule_batch
import
Req
,
ScheduleBatch
from
sglang.srt.managers.scheduler
import
Scheduler
from
sglang.srt.managers.scheduler
import
Scheduler
...
@@ -78,6 +77,7 @@ from sglang.srt.utils import (
...
@@ -78,6 +77,7 @@ from sglang.srt.utils import (
set_gpu_proc_affinity
,
set_gpu_proc_affinity
,
suppress_other_loggers
,
suppress_other_loggers
,
)
)
from
sglang.srt.utils.hf_transformers_utils
import
get_tokenizer
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
...
...
python/sglang/bench_serving.py
View file @
fdc4e1e5
...
@@ -635,7 +635,7 @@ def get_tokenizer(
...
@@ -635,7 +635,7 @@ def get_tokenizer(
if
pretrained_model_name_or_path
.
endswith
(
if
pretrained_model_name_or_path
.
endswith
(
".json"
".json"
)
or
pretrained_model_name_or_path
.
endswith
(
".model"
):
)
or
pretrained_model_name_or_path
.
endswith
(
".model"
):
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
return
get_tokenizer
(
pretrained_model_name_or_path
)
return
get_tokenizer
(
pretrained_model_name_or_path
)
...
...
python/sglang/lang/backend/runtime_endpoint.py
View file @
fdc4e1e5
...
@@ -433,7 +433,7 @@ class Runtime:
...
@@ -433,7 +433,7 @@ class Runtime:
self
.
endpoint
.
cache_prefix
(
prefix
)
self
.
endpoint
.
cache_prefix
(
prefix
)
def
get_tokenizer
(
self
):
def
get_tokenizer
(
self
):
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.
utils.
hf_transformers_utils
import
get_tokenizer
return
get_tokenizer
(
return
get_tokenizer
(
self
.
server_args
.
tokenizer_path
,
self
.
server_args
.
tokenizer_path
,
...
...
python/sglang/srt/configs/model_config.py
View file @
fdc4e1e5
...
@@ -23,16 +23,16 @@ import torch
...
@@ -23,16 +23,16 @@ import torch
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
sglang.srt.environ
import
envs
from
sglang.srt.environ
import
envs
from
sglang.srt.hf_transformers_utils
import
(
from
sglang.srt.layers.quantization
import
QUANTIZATION_METHODS
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
is_hip
,
retry
from
sglang.srt.utils.hf_transformers_utils
import
(
get_config
,
get_config
,
get_context_length
,
get_context_length
,
get_generation_config
,
get_generation_config
,
get_hf_text_config
,
get_hf_text_config
,
get_sparse_attention_config
,
get_sparse_attention_config
,
)
)
from
sglang.srt.layers.quantization
import
QUANTIZATION_METHODS
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
is_hip
,
retry
from
sglang.utils
import
is_in_ci
from
sglang.utils
import
is_in_ci
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/lora.py
View file @
fdc4e1e5
...
@@ -26,12 +26,12 @@ import torch
...
@@ -26,12 +26,12 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
sglang.srt.configs.load_config
import
LoadConfig
from
sglang.srt.configs.load_config
import
LoadConfig
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.lora.backend.base_backend
import
BaseLoRABackend
from
sglang.srt.lora.backend.base_backend
import
BaseLoRABackend
from
sglang.srt.lora.backend.chunked_backend
import
ChunkedSgmvLoRABackend
from
sglang.srt.lora.backend.chunked_backend
import
ChunkedSgmvLoRABackend
from
sglang.srt.lora.backend.triton_backend
import
TritonLoRABackend
from
sglang.srt.lora.backend.triton_backend
import
TritonLoRABackend
from
sglang.srt.lora.lora_config
import
LoRAConfig
from
sglang.srt.lora.lora_config
import
LoRAConfig
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
from
sglang.srt.utils.hf_transformers_utils
import
AutoConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/lora_manager.py
View file @
fdc4e1e5
...
@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple
...
@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple
import
torch
import
torch
from
sglang.srt.configs.load_config
import
LoadConfig
from
sglang.srt.configs.load_config
import
LoadConfig
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.lora.backend.base_backend
import
BaseLoRABackend
,
get_backend_from_name
from
sglang.srt.lora.backend.base_backend
import
BaseLoRABackend
,
get_backend_from_name
from
sglang.srt.lora.layers
import
BaseLayerWithLoRA
,
get_lora_layer
from
sglang.srt.lora.layers
import
BaseLayerWithLoRA
,
get_lora_layer
from
sglang.srt.lora.lora
import
LoRAAdapter
from
sglang.srt.lora.lora
import
LoRAAdapter
...
@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput
...
@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
replace_submodule
from
sglang.srt.utils
import
replace_submodule
from
sglang.srt.utils.hf_transformers_utils
import
AutoConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/mem_pool.py
View file @
fdc4e1e5
...
@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
...
@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import
torch
import
torch
from
sglang.srt.distributed
import
divide
from
sglang.srt.distributed
import
divide
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.lora.layers
import
BaseLayerWithLoRA
from
sglang.srt.lora.layers
import
BaseLayerWithLoRA
from
sglang.srt.lora.lora
import
LoRAAdapter
from
sglang.srt.lora.lora
import
LoRAAdapter
from
sglang.srt.lora.lora_config
import
LoRAConfig
from
sglang.srt.lora.lora_config
import
LoRAConfig
...
@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import (
...
@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import (
get_stacked_multiply
,
get_stacked_multiply
,
get_target_module_name
,
get_target_module_name
,
)
)
from
sglang.srt.utils.hf_transformers_utils
import
AutoConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/utils.py
View file @
fdc4e1e5
...
@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple
...
@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple
import
torch
import
torch
from
sglang.srt.hf_transformers_utils
import
AutoConfig
from
sglang.srt.
utils.
hf_transformers_utils
import
AutoConfig
@
dataclass
@
dataclass
...
...
python/sglang/srt/managers/detokenizer_manager.py
View file @
fdc4e1e5
...
@@ -24,7 +24,6 @@ import psutil
...
@@ -24,7 +24,6 @@ import psutil
import
setproctitle
import
setproctitle
import
zmq
import
zmq
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.managers.io_struct
import
(
from
sglang.srt.managers.io_struct
import
(
BatchEmbeddingOutput
,
BatchEmbeddingOutput
,
BatchMultimodalDecodeReq
,
BatchMultimodalDecodeReq
,
...
@@ -42,6 +41,7 @@ from sglang.srt.utils import (
...
@@ -42,6 +41,7 @@ from sglang.srt.utils import (
get_zmq_socket
,
get_zmq_socket
,
kill_itself_when_parent_died
,
kill_itself_when_parent_died
,
)
)
from
sglang.srt.utils.hf_transformers_utils
import
get_tokenizer
from
sglang.utils
import
(
from
sglang.utils
import
(
TypeBasedDispatcher
,
TypeBasedDispatcher
,
find_printable_text
,
find_printable_text
,
...
...
python/sglang/srt/managers/scheduler.py
View file @
fdc4e1e5
...
@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import (
...
@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import (
)
)
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.layers.dp_attention
import
compute_dp_attention_world_info
from
sglang.srt.layers.dp_attention
import
compute_dp_attention_world_info
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.layers.moe
import
initialize_moe_config
from
sglang.srt.layers.moe
import
initialize_moe_config
...
@@ -190,6 +185,11 @@ from sglang.srt.utils import (
...
@@ -190,6 +185,11 @@ from sglang.srt.utils import (
set_random_seed
,
set_random_seed
,
suppress_other_loggers
,
suppress_other_loggers
,
)
)
from
sglang.srt.utils.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.utils
import
TypeBasedDispatcher
,
get_exception_traceback
from
sglang.utils
import
TypeBasedDispatcher
,
get_exception_traceback
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/managers/scheduler_input_blocker.py
View file @
fdc4e1e5
...
@@ -17,7 +17,7 @@ from enum import Enum, auto
...
@@ -17,7 +17,7 @@ from enum import Enum, auto
from
typing
import
Any
,
List
,
Optional
from
typing
import
Any
,
List
,
Optional
from
sglang.srt.managers.io_struct
import
BlockReqInput
,
BlockReqType
from
sglang.srt.managers.io_struct
import
BlockReqInput
,
BlockReqType
from
sglang.srt.poll_based_barrier
import
PollBasedBarrier
from
sglang.srt.
utils.
poll_based_barrier
import
PollBasedBarrier
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/managers/scheduler_profiler_mixin.py
View file @
fdc4e1e5
...
@@ -204,7 +204,7 @@ class SchedulerProfilerMixin:
...
@@ -204,7 +204,7 @@ class SchedulerProfilerMixin:
torch
.
distributed
.
barrier
(
self
.
tp_cpu_group
)
torch
.
distributed
.
barrier
(
self
.
tp_cpu_group
)
if
self
.
tp_rank
==
0
:
if
self
.
tp_rank
==
0
:
from
sglang.srt.rpd_utils
import
rpd_to_chrome_trace
from
sglang.srt.
utils.
rpd_utils
import
rpd_to_chrome_trace
rpd_to_chrome_trace
(
"trace.rpd"
,
self
.
rpd_profile_path
)
rpd_to_chrome_trace
(
"trace.rpd"
,
self
.
rpd_profile_path
)
self
.
rpd_profiler
=
None
self
.
rpd_profiler
=
None
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
fdc4e1e5
...
@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks
...
@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks
from
sglang.srt.aio_rwlock
import
RWLock
from
sglang.srt.aio_rwlock
import
RWLock
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.disaggregation.utils
import
DisaggregationMode
from
sglang.srt.disaggregation.utils
import
DisaggregationMode
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.lora.lora_registry
import
LoRARegistry
from
sglang.srt.lora.lora_registry
import
LoRARegistry
from
sglang.srt.managers.async_dynamic_batch_tokenizer
import
AsyncDynamicbatchTokenizer
from
sglang.srt.managers.async_dynamic_batch_tokenizer
import
AsyncDynamicbatchTokenizer
from
sglang.srt.managers.disagg_service
import
start_disagg_service
from
sglang.srt.managers.disagg_service
import
start_disagg_service
...
@@ -99,6 +94,11 @@ from sglang.srt.utils import (
...
@@ -99,6 +94,11 @@ from sglang.srt.utils import (
get_zmq_socket
,
get_zmq_socket
,
kill_process_tree
,
kill_process_tree
,
)
)
from
sglang.srt.utils.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.utils
import
TypeBasedDispatcher
,
get_exception_traceback
from
sglang.utils
import
TypeBasedDispatcher
,
get_exception_traceback
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
...
...
python/sglang/srt/managers/tp_worker.py
View file @
fdc4e1e5
...
@@ -22,11 +22,6 @@ import torch
...
@@ -22,11 +22,6 @@ import torch
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.managers.io_struct
import
(
from
sglang.srt.managers.io_struct
import
(
DestroyWeightsUpdateGroupReqInput
,
DestroyWeightsUpdateGroupReqInput
,
...
@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import (
...
@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors
,
PPProxyTensors
,
)
)
from
sglang.srt.model_executor.model_runner
import
ModelRunner
from
sglang.srt.model_executor.model_runner
import
ModelRunner
from
sglang.srt.patch_torch
import
monkey_patch_torch_reductions
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
MultiprocessingSerializer
,
broadcast_pyobj
,
set_random_seed
from
sglang.srt.utils
import
MultiprocessingSerializer
,
broadcast_pyobj
,
set_random_seed
from
sglang.srt.utils.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
get_tokenizer_from_processor
,
)
from
sglang.srt.utils.patch_torch
import
monkey_patch_torch_reductions
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
sglang.srt.managers.cache_controller
import
LayerDoneCounter
from
sglang.srt.managers.cache_controller
import
LayerDoneCounter
...
...
python/sglang/srt/model_executor/cpu_graph_runner.py
View file @
fdc4e1e5
...
@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import (
...
@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import (
ForwardMode
,
ForwardMode
,
PPProxyTensors
,
PPProxyTensors
,
)
)
from
sglang.srt.patch_torch
import
monkey_patch_torch_compile
from
sglang.srt.speculative.spec_info
import
SpeculativeAlgorithm
from
sglang.srt.speculative.spec_info
import
SpeculativeAlgorithm
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
log_info_on_rank0
,
log_info_on_rank0
,
...
@@ -43,6 +42,7 @@ from sglang.srt.utils import (
...
@@ -43,6 +42,7 @@ from sglang.srt.utils import (
require_mlp_sync
,
require_mlp_sync
,
require_mlp_tp_gather
,
require_mlp_tp_gather
,
)
)
from
sglang.srt.utils.patch_torch
import
monkey_patch_torch_compile
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/model_executor/cuda_graph_runner.py
View file @
fdc4e1e5
...
@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import (
...
@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors
,
PPProxyTensors
,
enable_num_token_non_padded
,
enable_num_token_non_padded
,
)
)
from
sglang.srt.patch_torch
import
monkey_patch_torch_compile
from
sglang.srt.two_batch_overlap
import
TboCudaGraphRunnerPlugin
from
sglang.srt.two_batch_overlap
import
TboCudaGraphRunnerPlugin
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
empty_context
,
empty_context
,
...
@@ -62,6 +61,7 @@ from sglang.srt.utils import (
...
@@ -62,6 +61,7 @@ from sglang.srt.utils import (
require_mlp_sync
,
require_mlp_sync
,
require_mlp_tp_gather
,
require_mlp_tp_gather
,
)
)
from
sglang.srt.utils.patch_torch
import
monkey_patch_torch_compile
_is_hip
=
is_hip
()
_is_hip
=
is_hip
()
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment