Unverified Commit fdc4e1e5 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Tiny move files to utils folder (#11166)

parent 04b86b3c
...@@ -8,7 +8,7 @@ from datasets import load_dataset ...@@ -8,7 +8,7 @@ from datasets import load_dataset
import sglang as sgl import sglang as sgl
from sglang.global_config import global_config from sglang.global_config import global_config
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import ( from sglang.test.test_utils import (
add_common_sglang_args_and_parse, add_common_sglang_args_and_parse,
select_sglang_backend, select_sglang_backend,
......
...@@ -7,7 +7,7 @@ from pathlib import Path ...@@ -7,7 +7,7 @@ from pathlib import Path
from tqdm import tqdm from tqdm import tqdm
import sglang as sgl import sglang as sgl
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import ( from sglang.test.test_utils import (
add_common_sglang_args_and_parse, add_common_sglang_args_and_parse,
select_sglang_backend, select_sglang_backend,
......
...@@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o ...@@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o
""" """
import sglang as sgl import sglang as sgl
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils.hf_transformers_utils import get_tokenizer
MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct" MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct"
......
...@@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py ...@@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py
import requests import requests
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import is_in_ci from sglang.test.test_utils import is_in_ci
from sglang.utils import terminate_process, wait_for_server from sglang.utils import terminate_process, wait_for_server
......
...@@ -60,7 +60,6 @@ import torch.distributed as dist ...@@ -60,7 +60,6 @@ import torch.distributed as dist
from sglang.srt.configs.model_config import ModelConfig from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.distributed.parallel_state import destroy_distributed_environment from sglang.srt.distributed.parallel_state import destroy_distributed_environment
from sglang.srt.entrypoints.engine import _set_envs_and_config from sglang.srt.entrypoints.engine import _set_envs_and_config
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.layers.moe import initialize_moe_config from sglang.srt.layers.moe import initialize_moe_config
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
from sglang.srt.managers.scheduler import Scheduler from sglang.srt.managers.scheduler import Scheduler
...@@ -78,6 +77,7 @@ from sglang.srt.utils import ( ...@@ -78,6 +77,7 @@ from sglang.srt.utils import (
set_gpu_proc_affinity, set_gpu_proc_affinity,
suppress_other_loggers, suppress_other_loggers,
) )
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
@dataclasses.dataclass @dataclasses.dataclass
......
...@@ -635,7 +635,7 @@ def get_tokenizer( ...@@ -635,7 +635,7 @@ def get_tokenizer(
if pretrained_model_name_or_path.endswith( if pretrained_model_name_or_path.endswith(
".json" ".json"
) or pretrained_model_name_or_path.endswith(".model"): ) or pretrained_model_name_or_path.endswith(".model"):
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils.hf_transformers_utils import get_tokenizer
return get_tokenizer(pretrained_model_name_or_path) return get_tokenizer(pretrained_model_name_or_path)
......
...@@ -433,7 +433,7 @@ class Runtime: ...@@ -433,7 +433,7 @@ class Runtime:
self.endpoint.cache_prefix(prefix) self.endpoint.cache_prefix(prefix)
def get_tokenizer(self): def get_tokenizer(self):
from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils.hf_transformers_utils import get_tokenizer
return get_tokenizer( return get_tokenizer(
self.server_args.tokenizer_path, self.server_args.tokenizer_path,
......
...@@ -23,16 +23,16 @@ import torch ...@@ -23,16 +23,16 @@ import torch
from transformers import PretrainedConfig from transformers import PretrainedConfig
from sglang.srt.environ import envs from sglang.srt.environ import envs
from sglang.srt.hf_transformers_utils import ( from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import is_hip, retry
from sglang.srt.utils.hf_transformers_utils import (
get_config, get_config,
get_context_length, get_context_length,
get_generation_config, get_generation_config,
get_hf_text_config, get_hf_text_config,
get_sparse_attention_config, get_sparse_attention_config,
) )
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import is_hip, retry
from sglang.utils import is_in_ci from sglang.utils import is_in_ci
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -26,12 +26,12 @@ import torch ...@@ -26,12 +26,12 @@ import torch
from torch import nn from torch import nn
from sglang.srt.configs.load_config import LoadConfig from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend from sglang.srt.lora.backend.base_backend import BaseLoRABackend
from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
from sglang.srt.lora.backend.triton_backend import TritonLoRABackend from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
from sglang.srt.lora.lora_config import LoRAConfig from sglang.srt.lora.lora_config import LoRAConfig
from sglang.srt.model_loader.loader import DefaultModelLoader from sglang.srt.model_loader.loader import DefaultModelLoader
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple ...@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple
import torch import torch
from sglang.srt.configs.load_config import LoadConfig from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend, get_backend_from_name from sglang.srt.lora.backend.base_backend import BaseLoRABackend, get_backend_from_name
from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer
from sglang.srt.lora.lora import LoRAAdapter from sglang.srt.lora.lora import LoRAAdapter
...@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput ...@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput
from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.server_args import ServerArgs from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import replace_submodule from sglang.srt.utils import replace_submodule
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union ...@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import torch import torch
from sglang.srt.distributed import divide from sglang.srt.distributed import divide
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.layers import BaseLayerWithLoRA from sglang.srt.lora.layers import BaseLayerWithLoRA
from sglang.srt.lora.lora import LoRAAdapter from sglang.srt.lora.lora import LoRAAdapter
from sglang.srt.lora.lora_config import LoRAConfig from sglang.srt.lora.lora_config import LoRAConfig
...@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import ( ...@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import (
get_stacked_multiply, get_stacked_multiply,
get_target_module_name, get_target_module_name,
) )
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple ...@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple
import torch import torch
from sglang.srt.hf_transformers_utils import AutoConfig from sglang.srt.utils.hf_transformers_utils import AutoConfig
@dataclass @dataclass
......
...@@ -24,7 +24,6 @@ import psutil ...@@ -24,7 +24,6 @@ import psutil
import setproctitle import setproctitle
import zmq import zmq
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.io_struct import ( from sglang.srt.managers.io_struct import (
BatchEmbeddingOutput, BatchEmbeddingOutput,
BatchMultimodalDecodeReq, BatchMultimodalDecodeReq,
...@@ -42,6 +41,7 @@ from sglang.srt.utils import ( ...@@ -42,6 +41,7 @@ from sglang.srt.utils import (
get_zmq_socket, get_zmq_socket,
kill_itself_when_parent_died, kill_itself_when_parent_died,
) )
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.utils import ( from sglang.utils import (
TypeBasedDispatcher, TypeBasedDispatcher,
find_printable_text, find_printable_text,
......
...@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import ( ...@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import (
) )
from sglang.srt.distributed import get_pp_group, get_world_group from sglang.srt.distributed import get_pp_group, get_world_group
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.layers.moe import initialize_moe_config from sglang.srt.layers.moe import initialize_moe_config
...@@ -190,6 +185,11 @@ from sglang.srt.utils import ( ...@@ -190,6 +185,11 @@ from sglang.srt.utils import (
set_random_seed, set_random_seed,
suppress_other_loggers, suppress_other_loggers,
) )
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.utils import TypeBasedDispatcher, get_exception_traceback from sglang.utils import TypeBasedDispatcher, get_exception_traceback
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -17,7 +17,7 @@ from enum import Enum, auto ...@@ -17,7 +17,7 @@ from enum import Enum, auto
from typing import Any, List, Optional from typing import Any, List, Optional
from sglang.srt.managers.io_struct import BlockReqInput, BlockReqType from sglang.srt.managers.io_struct import BlockReqInput, BlockReqType
from sglang.srt.poll_based_barrier import PollBasedBarrier from sglang.srt.utils.poll_based_barrier import PollBasedBarrier
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -204,7 +204,7 @@ class SchedulerProfilerMixin: ...@@ -204,7 +204,7 @@ class SchedulerProfilerMixin:
torch.distributed.barrier(self.tp_cpu_group) torch.distributed.barrier(self.tp_cpu_group)
if self.tp_rank == 0: if self.tp_rank == 0:
from sglang.srt.rpd_utils import rpd_to_chrome_trace from sglang.srt.utils.rpd_utils import rpd_to_chrome_trace
rpd_to_chrome_trace("trace.rpd", self.rpd_profile_path) rpd_to_chrome_trace("trace.rpd", self.rpd_profile_path)
self.rpd_profiler = None self.rpd_profiler = None
......
...@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks ...@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks
from sglang.srt.aio_rwlock import RWLock from sglang.srt.aio_rwlock import RWLock
from sglang.srt.configs.model_config import ModelConfig from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.disaggregation.utils import DisaggregationMode from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.lora.lora_registry import LoRARegistry from sglang.srt.lora.lora_registry import LoRARegistry
from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer
from sglang.srt.managers.disagg_service import start_disagg_service from sglang.srt.managers.disagg_service import start_disagg_service
...@@ -99,6 +94,11 @@ from sglang.srt.utils import ( ...@@ -99,6 +94,11 @@ from sglang.srt.utils import (
get_zmq_socket, get_zmq_socket,
kill_process_tree, kill_process_tree,
) )
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.utils import TypeBasedDispatcher, get_exception_traceback from sglang.utils import TypeBasedDispatcher, get_exception_traceback
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
......
...@@ -22,11 +22,6 @@ import torch ...@@ -22,11 +22,6 @@ import torch
from sglang.srt.configs.model_config import ModelConfig from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.distributed import get_pp_group, get_world_group from sglang.srt.distributed import get_pp_group, get_world_group
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.managers.io_struct import ( from sglang.srt.managers.io_struct import (
DestroyWeightsUpdateGroupReqInput, DestroyWeightsUpdateGroupReqInput,
...@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import ( ...@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors, PPProxyTensors,
) )
from sglang.srt.model_executor.model_runner import ModelRunner from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.server_args import ServerArgs from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.managers.cache_controller import LayerDoneCounter from sglang.srt.managers.cache_controller import LayerDoneCounter
......
...@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import ( ...@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import (
ForwardMode, ForwardMode,
PPProxyTensors, PPProxyTensors,
) )
from sglang.srt.patch_torch import monkey_patch_torch_compile
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.utils import ( from sglang.srt.utils import (
log_info_on_rank0, log_info_on_rank0,
...@@ -43,6 +42,7 @@ from sglang.srt.utils import ( ...@@ -43,6 +42,7 @@ from sglang.srt.utils import (
require_mlp_sync, require_mlp_sync,
require_mlp_tp_gather, require_mlp_tp_gather,
) )
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import ( ...@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors, PPProxyTensors,
enable_num_token_non_padded, enable_num_token_non_padded,
) )
from sglang.srt.patch_torch import monkey_patch_torch_compile
from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
from sglang.srt.utils import ( from sglang.srt.utils import (
empty_context, empty_context,
...@@ -62,6 +61,7 @@ from sglang.srt.utils import ( ...@@ -62,6 +61,7 @@ from sglang.srt.utils import (
require_mlp_sync, require_mlp_sync,
require_mlp_tp_gather, require_mlp_tp_gather,
) )
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
_is_hip = is_hip() _is_hip = is_hip()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment