Unverified Commit fdc4e1e5 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Tiny move files to utils folder (#11166)

parent 04b86b3c
......@@ -8,7 +8,7 @@ from datasets import load_dataset
import sglang as sgl
from sglang.global_config import global_config
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
select_sglang_backend,
......
......@@ -7,7 +7,7 @@ from pathlib import Path
from tqdm import tqdm
import sglang as sgl
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
select_sglang_backend,
......
......@@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o
"""
import sglang as sgl
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct"
......
......@@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py
import requests
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import is_in_ci
from sglang.utils import terminate_process, wait_for_server
......
......@@ -60,7 +60,6 @@ import torch.distributed as dist
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
from sglang.srt.entrypoints.engine import _set_envs_and_config
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.layers.moe import initialize_moe_config
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
from sglang.srt.managers.scheduler import Scheduler
......@@ -78,6 +77,7 @@ from sglang.srt.utils import (
set_gpu_proc_affinity,
suppress_other_loggers,
)
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
@dataclasses.dataclass
......
......@@ -635,7 +635,7 @@ def get_tokenizer(
if pretrained_model_name_or_path.endswith(
".json"
) or pretrained_model_name_or_path.endswith(".model"):
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
return get_tokenizer(pretrained_model_name_or_path)
......
......@@ -433,7 +433,7 @@ class Runtime:
self.endpoint.cache_prefix(prefix)
def get_tokenizer(self):
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
return get_tokenizer(
self.server_args.tokenizer_path,
......
......@@ -23,16 +23,16 @@ import torch
from transformers import PretrainedConfig
from sglang.srt.environ import envs
from sglang.srt.hf_transformers_utils import (
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import is_hip, retry
from sglang.srt.utils.hf_transformers_utils import (
get_config,
get_context_length,
get_generation_config,
get_hf_text_config,
get_sparse_attention_config,
)
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import is_hip, retry
from sglang.utils import is_in_ci
logger = logging.getLogger(__name__)
......
......@@ -26,12 +26,12 @@ import torch
from torch import nn
from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend
from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
from sglang.srt.lora.lora_config import LoRAConfig
from sglang.srt.model_loader.loader import DefaultModelLoader
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__)
......
......@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple
import torch
from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend, get_backend_from_name
from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer
from sglang.srt.lora.lora import LoRAAdapter
......@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import replace_submodule
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__)
......
......@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import torch
from sglang.srt.distributed import divide
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.layers import BaseLayerWithLoRA
from sglang.srt.lora.lora import LoRAAdapter
from sglang.srt.lora.lora_config import LoRAConfig
......@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import (
get_stacked_multiply,
get_target_module_name,
)
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__)
......
......@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple
import torch
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.utils.hf_transformers_utils import AutoConfig
@dataclass
......
......@@ -24,7 +24,6 @@ import psutil
import setproctitle
import zmq
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.io_struct import (
BatchEmbeddingOutput,
BatchMultimodalDecodeReq,
......@@ -42,6 +41,7 @@ from sglang.srt.utils import (
get_zmq_socket,
kill_itself_when_parent_died,
)
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.utils import (
TypeBasedDispatcher,
find_printable_text,
......
......@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import (
)
from sglang.srt.distributed import get_pp_group, get_world_group
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.layers.moe import initialize_moe_config
......@@ -190,6 +185,11 @@ from sglang.srt.utils import (
set_random_seed,
suppress_other_loggers,
)
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
logger = logging.getLogger(__name__)
......
......@@ -17,7 +17,7 @@ from enum import Enum, auto
from typing import Any, List, Optional
from sglang.srt.managers.io_struct import BlockReqInput, BlockReqType
from sglang.srt.poll_based_barrier import PollBasedBarrier
from sglang.srt.utils.poll_based_barrier import PollBasedBarrier
logger = logging.getLogger(__name__)
......
......@@ -204,7 +204,7 @@ class SchedulerProfilerMixin:
torch.distributed.barrier(self.tp_cpu_group)
if self.tp_rank == 0:
from sglang.srt.rpd_utils import rpd_to_chrome_trace
from sglang.srt.utils.rpd_utils import rpd_to_chrome_trace
rpd_to_chrome_trace("trace.rpd", self.rpd_profile_path)
self.rpd_profiler = None
......
......@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks
from sglang.srt.aio_rwlock import RWLock
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.lora.lora_registry import LoRARegistry
from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer
from sglang.srt.managers.disagg_service import start_disagg_service
......@@ -99,6 +94,11 @@ from sglang.srt.utils import (
get_zmq_socket,
kill_process_tree,
)
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
......
......@@ -22,11 +22,6 @@ import torch
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.distributed import get_pp_group, get_world_group
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.managers.io_struct import (
DestroyWeightsUpdateGroupReqInput,
......@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors,
)
from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
if TYPE_CHECKING:
from sglang.srt.managers.cache_controller import LayerDoneCounter
......
......@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import (
ForwardMode,
PPProxyTensors,
)
from sglang.srt.patch_torch import monkey_patch_torch_compile
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.utils import (
log_info_on_rank0,
......@@ -43,6 +42,7 @@ from sglang.srt.utils import (
require_mlp_sync,
require_mlp_tp_gather,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
logger = logging.getLogger(__name__)
......
......@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors,
enable_num_token_non_padded,
)
from sglang.srt.patch_torch import monkey_patch_torch_compile
from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
from sglang.srt.utils import (
empty_context,
......@@ -62,6 +61,7 @@ from sglang.srt.utils import (
require_mlp_sync,
require_mlp_tp_gather,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
_is_hip = is_hip()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment