[Misc] Clean up more utils (#27567)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Misc] Clean up more utils (#27567)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
6ebffafb · Cyrus Leung · GitHub · 3b96f85c · 6ebffafb · 6ebffafb
Unverified Commit 6ebffafb authored Oct 27, 2025 by Cyrus Leung Committed by GitHub Oct 27, 2025
4 changed files
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -69,10 +69,7 @@ from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingType
 from vllm.sequence import IntermediateTensors
 from vllm.tasks import GenerationTask, PoolingTask, SupportedTask
-from vllm.utils import (
+from vllm.utils import length_from_prompt_token_ids_or_embeds
-    check_use_alibi,
-    length_from_prompt_token_ids_or_embeds,
-)
 from vllm.utils.jsontree import json_map_leaves
 from vllm.utils.math_utils import cdiv, round_up
 from vllm.utils.mem_constants import GiB_bytes
@@ -266,7 +263,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        self.hidden_size = model_config.get_hidden_size()
        self.attention_chunk_size = model_config.attention_chunk_size
        # Only relevant for models using ALiBi (e.g, MPT)
-        self.use_alibi = check_use_alibi(model_config)
+        self.use_alibi = model_config.uses_alibi
        self.cascade_attn_enabled = not self.model_config.disable_cascade_attn

--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -72,7 +72,7 @@ class Worker(WorkerBase):
        if self.model_config.trust_remote_code:
            # note: lazy import to avoid importing torch before initializing
-            from vllm.utils import init_cached_hf_modules
+            from vllm.utils.import_utils import init_cached_hf_modules
            init_cached_hf_modules()

--- a/vllm/v1/worker/tpu_worker.py
+++ b/vllm/v1/worker/tpu_worker.py
@@ -89,7 +89,7 @@ class TPUWorker:
        if self.model_config.trust_remote_code:
            # note: lazy import to avoid importing torch before initializing
-            from vllm.utils import init_cached_hf_modules
+            from vllm.utils.import_utils import init_cached_hf_modules
            init_cached_hf_modules()

--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@@ -13,14 +13,11 @@ from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.cache import worker_receiver_cache_from_config
-from vllm.utils import (
+from vllm.utils import warn_for_unimplemented_methods
-    enable_trace_function_call_for_thread,
-    run_method,
-    warn_for_unimplemented_methods,
-)
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.utils.system_utils import update_environment_variables
 from vllm.v1.kv_cache_interface import KVCacheSpec
+from vllm.v1.serial_utils import run_method
 if TYPE_CHECKING:
    from vllm.v1.core.sched.output import SchedulerOutput
@@ -182,17 +179,18 @@ class WorkerWrapperBase:
        """
        self.rpc_rank = rpc_rank
        self.worker: WorkerBase | None = None
-        self.vllm_config: VllmConfig | None = None
        # do not store this `vllm_config`, `init_worker` will set the final
-        # one. TODO: investigate if we can remove this field in
+        # one.
-        # `WorkerWrapperBase`, `init_cached_hf_modules` should be
+        # TODO: investigate if we can remove this field in `WorkerWrapperBase`,
-        # unnecessary now.
+        # `init_cached_hf_modules` should be unnecessary now.
-        if vllm_config.model_config is not None:
+        self.vllm_config: VllmConfig | None = None
-            # it can be None in tests
-            trust_remote_code = vllm_config.model_config.trust_remote_code
+        # `model_config` can be None in tests
-            if trust_remote_code:
+        model_config = vllm_config.model_config
+        if model_config and model_config.trust_remote_code:
            # note: lazy import to avoid importing torch before initializing
-                from vllm.utils import init_cached_hf_modules
+            from vllm.utils.import_utils import init_cached_hf_modules
            init_cached_hf_modules()
@@ -231,7 +229,7 @@ class WorkerWrapperBase:
        assert self.vllm_config is not None, (
            "vllm_config is required to initialize the worker"
        )
-        enable_trace_function_call_for_thread(self.vllm_config)
+        self.vllm_config.enable_trace_function_call_for_thread()
        from vllm.plugins import load_general_plugins