Unverified Commit 510bc9e1 authored by wangxiyuan's avatar wangxiyuan Committed by GitHub
Browse files

[Misc] Cleanup useless `current_platform` import (#35715)


Signed-off-by: default avatarwangxiyuan <wangxiyuan1007@gmail.com>
parent cbd361fd
...@@ -18,7 +18,6 @@ from vllm.logger import init_logger ...@@ -18,7 +18,6 @@ from vllm.logger import init_logger
from vllm.model_executor.layers.quantization.utils.quant_utils import ( from vllm.model_executor.layers.quantization.utils.quant_utils import (
kFp8StaticTensorSym, kFp8StaticTensorSym,
) )
from vllm.platforms import current_platform
from ..inductor_pass import enable_fake_mode from ..inductor_pass import enable_fake_mode
from ..utility.noop_elimination import NoOpEliminationPass from ..utility.noop_elimination import NoOpEliminationPass
...@@ -215,9 +214,6 @@ class MiddleAllReduceRMSNormPattern(_SequenceParallelPatternHelper): ...@@ -215,9 +214,6 @@ class MiddleAllReduceRMSNormPattern(_SequenceParallelPatternHelper):
) )
FP8_DTYPE = current_platform.fp8_dtype()
class FirstAllReduceRMSNormStaticFP8Pattern(_SequenceParallelPatternHelper): class FirstAllReduceRMSNormStaticFP8Pattern(_SequenceParallelPatternHelper):
def __init__( def __init__(
self, self,
......
...@@ -461,8 +461,6 @@ class ModelConfig: ...@@ -461,8 +461,6 @@ class ModelConfig:
self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer) self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer)
from vllm.platforms import current_platform
if self.override_attention_dtype is not None and not current_platform.is_rocm(): if self.override_attention_dtype is not None and not current_platform.is_rocm():
warnings.warn( warnings.warn(
"override-attention-dtype is set but not using ROCm platform", "override-attention-dtype is set but not using ROCm platform",
...@@ -940,8 +938,6 @@ class ModelConfig: ...@@ -940,8 +938,6 @@ class ModelConfig:
f"Unknown quantization method: {self.quantization}. Must " f"Unknown quantization method: {self.quantization}. Must "
f"be one of {supported_quantization}." f"be one of {supported_quantization}."
) )
from vllm.platforms import current_platform
current_platform.verify_quantization(self.quantization) current_platform.verify_quantization(self.quantization)
if self.quantization in me_quant.DEPRECATED_QUANTIZATION_METHODS: if self.quantization in me_quant.DEPRECATED_QUANTIZATION_METHODS:
...@@ -1811,8 +1807,6 @@ def _resolve_auto_dtype( ...@@ -1811,8 +1807,6 @@ def _resolve_auto_dtype(
*, *,
is_pooling_model: bool, is_pooling_model: bool,
): ):
from vllm.platforms import current_platform
supported_dtypes = [ supported_dtypes = [
dtype dtype
for dtype in current_platform.supported_dtypes for dtype in current_platform.supported_dtypes
......
...@@ -385,8 +385,6 @@ class GroupCoordinator: ...@@ -385,8 +385,6 @@ class GroupCoordinator:
self.cpu_group, 1 << 22, 6 self.cpu_group, 1 << 22, 6
) )
from vllm.platforms import current_platform
self.use_custom_op_call = ( self.use_custom_op_call = (
current_platform.is_cuda_alike() or current_platform.is_tpu() current_platform.is_cuda_alike() or current_platform.is_tpu()
) )
......
...@@ -55,9 +55,6 @@ elif current_platform.is_rocm(): ...@@ -55,9 +55,6 @@ elif current_platform.is_rocm():
def get_flash_attn_version( def get_flash_attn_version(
requires_alibi: bool = False, head_size: int | None = None requires_alibi: bool = False, head_size: int | None = None
) -> int | None: ) -> int | None:
# import here to avoid circular dependencies
from vllm.platforms import current_platform
if current_platform.is_xpu(): if current_platform.is_xpu():
return 2 return 2
if current_platform.is_rocm(): if current_platform.is_rocm():
......
...@@ -374,8 +374,6 @@ class FlashInferBackend(AttentionBackend): ...@@ -374,8 +374,6 @@ class FlashInferBackend(AttentionBackend):
@classmethod @classmethod
def get_required_kv_cache_layout(cls) -> KVCacheLayoutType | None: def get_required_kv_cache_layout(cls) -> KVCacheLayoutType | None:
from vllm.platforms import current_platform
capability = current_platform.get_device_capability() capability = current_platform.get_device_capability()
if capability is not None and capability.major == 10: if capability is not None and capability.major == 10:
return "HND" return "HND"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment