Unverified Commit c16b33cc authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

cleanup deps 3/n (#4541)

parent 2d004512
...@@ -152,8 +152,6 @@ class Fp8Config(QuantizationConfig): ...@@ -152,8 +152,6 @@ class Fp8Config(QuantizationConfig):
def get_quant_method( def get_quant_method(
self, layer: torch.nn.Module, prefix: str self, layer: torch.nn.Module, prefix: str
) -> Optional["QuantizeMethodBase"]: ) -> Optional["QuantizeMethodBase"]:
from vllm.attention.layer import Attention # Avoid circular import
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
if isinstance(layer, LinearBase): if isinstance(layer, LinearBase):
...@@ -162,8 +160,6 @@ class Fp8Config(QuantizationConfig): ...@@ -162,8 +160,6 @@ class Fp8Config(QuantizationConfig):
return Fp8LinearMethod(self) return Fp8LinearMethod(self)
elif isinstance(layer, FusedMoE): elif isinstance(layer, FusedMoE):
return Fp8MoEMethod(self) return Fp8MoEMethod(self)
elif isinstance(layer, Attention):
return Fp8KVCacheMethod(self)
return None return None
def get_scaled_act_names(self) -> List[str]: def get_scaled_act_names(self) -> List[str]:
......
...@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple ...@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm import _custom_ops as ops
from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import ReplicatedLinear from sglang.srt.layers.linear import ReplicatedLinear
...@@ -48,6 +47,8 @@ _is_cuda = is_cuda() ...@@ -48,6 +47,8 @@ _is_cuda = is_cuda()
if _is_cuda: if _is_cuda:
from sgl_kernel import awq_dequantize from sgl_kernel import awq_dequantize
else:
from vllm import _custom_ops as ops
class DeepseekModelNextN(nn.Module): class DeepseekModelNextN(nn.Module):
......
...@@ -23,7 +23,6 @@ import torch ...@@ -23,7 +23,6 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm import _custom_ops as ops
from sglang.srt.distributed import ( from sglang.srt.distributed import (
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
...@@ -75,6 +74,8 @@ _is_cuda = is_cuda() ...@@ -75,6 +74,8 @@ _is_cuda = is_cuda()
if _is_cuda: if _is_cuda:
from sgl_kernel import awq_dequantize, bmm_fp8 from sgl_kernel import awq_dequantize, bmm_fp8
else:
from vllm import _custom_ops as ops
class DeepseekV2MLP(nn.Module): class DeepseekV2MLP(nn.Module):
......
...@@ -531,7 +531,10 @@ def load_image(image_file: Union[str, bytes]): ...@@ -531,7 +531,10 @@ def load_image(image_file: Union[str, bytes]):
def suppress_other_loggers(): def suppress_other_loggers():
from vllm.logger import logger as vllm_default_logger try:
from vllm.logger import logger as vllm_default_logger
except ImportError:
return
vllm_default_logger.setLevel(logging.WARN) vllm_default_logger.setLevel(logging.WARN)
logging.getLogger("vllm.distributed.device_communicators.pynccl").setLevel( logging.getLogger("vllm.distributed.device_communicators.pynccl").setLevel(
...@@ -620,11 +623,14 @@ def monkey_patch_p2p_access_check(): ...@@ -620,11 +623,14 @@ def monkey_patch_p2p_access_check():
def monkey_patch_vllm_gguf_config(): def monkey_patch_vllm_gguf_config():
from vllm.model_executor.layers.quantization.gguf import ( try:
GGUFConfig, from vllm.model_executor.layers.quantization.gguf import (
GGUFEmbeddingMethod, GGUFConfig,
GGUFLinearMethod, GGUFEmbeddingMethod,
) GGUFLinearMethod,
)
except ImportError:
return
from sglang.srt.layers.linear import LinearBase from sglang.srt.layers.linear import LinearBase
from sglang.srt.layers.vocab_parallel_embedding import VocabParallelEmbedding from sglang.srt.layers.vocab_parallel_embedding import VocabParallelEmbedding
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment