Unverified Commit 6ebffafb authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Misc] Clean up more utils (#27567)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 3b96f85c
...@@ -69,10 +69,7 @@ from vllm.pooling_params import PoolingParams ...@@ -69,10 +69,7 @@ from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingType from vllm.sampling_params import SamplingType
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.tasks import GenerationTask, PoolingTask, SupportedTask from vllm.tasks import GenerationTask, PoolingTask, SupportedTask
from vllm.utils import ( from vllm.utils import length_from_prompt_token_ids_or_embeds
check_use_alibi,
length_from_prompt_token_ids_or_embeds,
)
from vllm.utils.jsontree import json_map_leaves from vllm.utils.jsontree import json_map_leaves
from vllm.utils.math_utils import cdiv, round_up from vllm.utils.math_utils import cdiv, round_up
from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_constants import GiB_bytes
...@@ -266,7 +263,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -266,7 +263,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
self.hidden_size = model_config.get_hidden_size() self.hidden_size = model_config.get_hidden_size()
self.attention_chunk_size = model_config.attention_chunk_size self.attention_chunk_size = model_config.attention_chunk_size
# Only relevant for models using ALiBi (e.g, MPT) # Only relevant for models using ALiBi (e.g, MPT)
self.use_alibi = check_use_alibi(model_config) self.use_alibi = model_config.uses_alibi
self.cascade_attn_enabled = not self.model_config.disable_cascade_attn self.cascade_attn_enabled = not self.model_config.disable_cascade_attn
......
...@@ -72,7 +72,7 @@ class Worker(WorkerBase): ...@@ -72,7 +72,7 @@ class Worker(WorkerBase):
if self.model_config.trust_remote_code: if self.model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing # note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules from vllm.utils.import_utils import init_cached_hf_modules
init_cached_hf_modules() init_cached_hf_modules()
......
...@@ -89,7 +89,7 @@ class TPUWorker: ...@@ -89,7 +89,7 @@ class TPUWorker:
if self.model_config.trust_remote_code: if self.model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing # note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules from vllm.utils.import_utils import init_cached_hf_modules
init_cached_hf_modules() init_cached_hf_modules()
......
...@@ -13,14 +13,11 @@ from vllm.logger import init_logger ...@@ -13,14 +13,11 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.cache import worker_receiver_cache_from_config from vllm.multimodal.cache import worker_receiver_cache_from_config
from vllm.utils import ( from vllm.utils import warn_for_unimplemented_methods
enable_trace_function_call_for_thread,
run_method,
warn_for_unimplemented_methods,
)
from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.system_utils import update_environment_variables from vllm.utils.system_utils import update_environment_variables
from vllm.v1.kv_cache_interface import KVCacheSpec from vllm.v1.kv_cache_interface import KVCacheSpec
from vllm.v1.serial_utils import run_method
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.core.sched.output import SchedulerOutput
...@@ -182,17 +179,18 @@ class WorkerWrapperBase: ...@@ -182,17 +179,18 @@ class WorkerWrapperBase:
""" """
self.rpc_rank = rpc_rank self.rpc_rank = rpc_rank
self.worker: WorkerBase | None = None self.worker: WorkerBase | None = None
self.vllm_config: VllmConfig | None = None
# do not store this `vllm_config`, `init_worker` will set the final # do not store this `vllm_config`, `init_worker` will set the final
# one. TODO: investigate if we can remove this field in # one.
# `WorkerWrapperBase`, `init_cached_hf_modules` should be # TODO: investigate if we can remove this field in `WorkerWrapperBase`,
# unnecessary now. # `init_cached_hf_modules` should be unnecessary now.
if vllm_config.model_config is not None: self.vllm_config: VllmConfig | None = None
# it can be None in tests
trust_remote_code = vllm_config.model_config.trust_remote_code # `model_config` can be None in tests
if trust_remote_code: model_config = vllm_config.model_config
if model_config and model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing # note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules from vllm.utils.import_utils import init_cached_hf_modules
init_cached_hf_modules() init_cached_hf_modules()
...@@ -231,7 +229,7 @@ class WorkerWrapperBase: ...@@ -231,7 +229,7 @@ class WorkerWrapperBase:
assert self.vllm_config is not None, ( assert self.vllm_config is not None, (
"vllm_config is required to initialize the worker" "vllm_config is required to initialize the worker"
) )
enable_trace_function_call_for_thread(self.vllm_config) self.vllm_config.enable_trace_function_call_for_thread()
from vllm.plugins import load_general_plugins from vllm.plugins import load_general_plugins
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment