Unverified Commit 609b533c authored by Syed Muhammad Bin Asif's avatar Syed Muhammad Bin Asif Committed by GitHub
Browse files

[Bugfix] Add proper comparison for package versions (#22314)


Signed-off-by: default avatarSyed Muhammad Bin Asif <syedmba7@connect.hku.hk>
parent 5e9455ae
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
# Copyright (c) Microsoft Corporation. # Copyright (c) Microsoft Corporation.
# Licensed under the MIT License. # Licensed under the MIT License.
from packaging import version
from vllm.model_executor.layers.quantization.utils.bitblas_utils import ( from vllm.model_executor.layers.quantization.utils.bitblas_utils import (
MINIMUM_BITBLAS_VERSION, MINIMUM_BITBLAS_VERSION,
) )
...@@ -10,7 +12,7 @@ from vllm.model_executor.layers.quantization.utils.bitblas_utils import ( ...@@ -10,7 +12,7 @@ from vllm.model_executor.layers.quantization.utils.bitblas_utils import (
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}" f"install bitblas>={MINIMUM_BITBLAS_VERSION}"
......
...@@ -200,7 +200,8 @@ vision-language model. ...@@ -200,7 +200,8 @@ vision-language model.
lora_config = vllm_config.lora_config lora_config = vllm_config.lora_config
super().__init__(config, cache_config, quant_config, lora_config, prefix) super().__init__(config, cache_config, quant_config, lora_config, prefix)
if __version__ >= "0.6.4": from packaging import version
if version.parse(__version__) >= version.parse("0.6.4"):
MyModel = MyNewModel MyModel = MyNewModel
else: else:
MyModel = MyOldModel MyModel = MyOldModel
......
...@@ -31,6 +31,8 @@ It supports page size >= 1. ...@@ -31,6 +31,8 @@ It supports page size >= 1.
import logging import logging
from packaging import version
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.triton_utils import tl, triton from vllm.triton_utils import tl, triton
...@@ -40,7 +42,7 @@ logger = logging.getLogger(__name__) ...@@ -40,7 +42,7 @@ logger = logging.getLogger(__name__)
# Only print the following warnings when triton version < 3.2.0. # Only print the following warnings when triton version < 3.2.0.
# The issue won't affect performance or accuracy. # The issue won't affect performance or accuracy.
if triton.__version__ < '3.2.0': if version.parse(triton.__version__) < version.parse('3.2.0'):
logger.warning( logger.warning(
"The following error message 'operation scheduled before its operands' " "The following error message 'operation scheduled before its operands' "
"can be ignored.") "can be ignored.")
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
from typing import Any, Optional from typing import Any, Optional
import torch import torch
from packaging import version
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
...@@ -45,7 +46,8 @@ class BitBLASConfig(QuantizationConfig): ...@@ -45,7 +46,8 @@ class BitBLASConfig(QuantizationConfig):
) -> None: ) -> None:
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(
MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from typing import Any, Callable, Optional, Union from typing import Any, Callable, Optional, Union
import torch import torch
from packaging import version
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE, from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
FusedMoEMethodBase) FusedMoEMethodBase)
...@@ -169,7 +170,8 @@ class BitsAndBytesLinearMethod(LinearMethodBase): ...@@ -169,7 +170,8 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
def __init__(self, quant_config: BitsAndBytesConfig): def __init__(self, quant_config: BitsAndBytesConfig):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.46.1": if version.parse(
bitsandbytes.__version__) < version.parse("0.46.1"):
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.46.1.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:
...@@ -412,7 +414,8 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase): ...@@ -412,7 +414,8 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
def __init__(self, quant_config: BitsAndBytesConfig): def __init__(self, quant_config: BitsAndBytesConfig):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.46.1": if version.parse(
bitsandbytes.__version__) < version.parse("0.46.1"):
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.46.1.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:
......
...@@ -6,6 +6,7 @@ from typing import Any, Optional ...@@ -6,6 +6,7 @@ from typing import Any, Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from packaging import version
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
from vllm.model_executor.layers.quantization import QuantizationMethods from vllm.model_executor.layers.quantization import QuantizationMethods
...@@ -145,7 +146,7 @@ class DeepSpeedFPParameter(nn.Parameter): ...@@ -145,7 +146,7 @@ class DeepSpeedFPParameter(nn.Parameter):
quant_config: DeepSpeedFPConfig): quant_config: DeepSpeedFPConfig):
try: try:
import deepspeed import deepspeed
if deepspeed.__version__ < "0.14.2": if version.parse(deepspeed.__version__) < version.parse("0.14.2"):
raise ImportError("deepspeed version is wrong. Please " raise ImportError("deepspeed version is wrong. Please "
"install deepspeed>=0.14.2.") "install deepspeed>=0.14.2.")
from deepspeed.ops.fp_quantizer import FP_Quantize from deepspeed.ops.fp_quantizer import FP_Quantize
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
from typing import Any, Optional from typing import Any, Optional
import torch import torch
from packaging import version
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from vllm.logger import init_logger from vllm.logger import init_logger
...@@ -63,7 +64,8 @@ class GPTQBitBLASConfig(QuantizationConfig): ...@@ -63,7 +64,8 @@ class GPTQBitBLASConfig(QuantizationConfig):
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(
MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from typing import Any, Optional from typing import Any, Optional
import torch import torch
from packaging import version
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
UnquantizedLinearMethod) UnquantizedLinearMethod)
...@@ -135,7 +136,8 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod): ...@@ -135,7 +136,8 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod):
try: try:
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
if ipex.__version__ < MIN_IPEX_VERSION: if version.parse(
ipex.__version__) < version.parse(MIN_IPEX_VERSION):
raise ImportError( raise ImportError(
"intel_extension_for_pytorch version is " "intel_extension_for_pytorch version is "
"wrong. Please install " "wrong. Please install "
...@@ -199,7 +201,8 @@ class IPEXAWQLinearMethod(AWQLinearMethod): ...@@ -199,7 +201,8 @@ class IPEXAWQLinearMethod(AWQLinearMethod):
try: try:
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
if ipex.__version__ < MIN_IPEX_VERSION: if version.parse(
ipex.__version__) < version.parse(MIN_IPEX_VERSION):
raise ImportError( raise ImportError(
"intel_extension_for_pytorch version is " "intel_extension_for_pytorch version is "
"wrong. Please install " "wrong. Please install "
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from typing import Optional from typing import Optional
import torch import torch
from packaging import version
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.quantization.base_config import ( from vllm.model_executor.layers.quantization.base_config import (
...@@ -110,7 +111,8 @@ class BitBLASLinearKernel(MPLinearKernel): ...@@ -110,7 +111,8 @@ class BitBLASLinearKernel(MPLinearKernel):
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(
MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
from typing import Optional from typing import Optional
import torch import torch
from packaging import version
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.scalar_type import ScalarType, scalar_types from vllm.scalar_type import ScalarType, scalar_types
...@@ -75,7 +76,8 @@ def _check_bitblas_supported( ...@@ -75,7 +76,8 @@ def _check_bitblas_supported(
# Finally, check if bitblas is installed # Finally, check if bitblas is installed
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(
bitblas.__version__) < version.parse(MINIMUM_BITBLAS_VERSION):
raise ImportError("bitblas version is wrong. Please " raise ImportError("bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
except ImportError: except ImportError:
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from typing import Callable, Optional, Union from typing import Callable, Optional, Union
import torch import torch
from packaging import version
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm import envs from vllm import envs
...@@ -21,8 +22,8 @@ TORCH_DEVICE_IDENTITY = None ...@@ -21,8 +22,8 @@ TORCH_DEVICE_IDENTITY = None
# torch._scaled_mm rowwise feature. # torch._scaled_mm rowwise feature.
# The condition is determined once as the operations # The condition is determined once as the operations
# are time consuming. # are time consuming.
USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() and version.parse(
and torch.__version__[0:3] >= "2.7" torch.__version__) >= version.parse("2.7")
and current_platform.has_device_capability(94)) and current_platform.has_device_capability(94))
......
...@@ -12,6 +12,7 @@ from typing import Any, Callable, Optional ...@@ -12,6 +12,7 @@ from typing import Any, Callable, Optional
import numpy as np import numpy as np
import torch import torch
from huggingface_hub import HfApi from huggingface_hub import HfApi
from packaging import version
from torch import nn from torch import nn
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
...@@ -193,7 +194,8 @@ class BitsAndBytesModelLoader(BaseModelLoader): ...@@ -193,7 +194,8 @@ class BitsAndBytesModelLoader(BaseModelLoader):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.46.1": if version.parse(
bitsandbytes.__version__) < version.parse("0.46.1"):
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.46.1.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:
......
...@@ -5,6 +5,7 @@ from typing import Optional ...@@ -5,6 +5,7 @@ from typing import Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
from packaging import version
from vllm import envs from vllm import envs
from vllm.logger import init_logger from vllm.logger import init_logger
...@@ -32,7 +33,7 @@ class TopKTopPSampler(nn.Module): ...@@ -32,7 +33,7 @@ class TopKTopPSampler(nn.Module):
if current_platform.is_cuda(): if current_platform.is_cuda():
if is_flashinfer_available: if is_flashinfer_available:
flashinfer_version = flashinfer.__version__ flashinfer_version = flashinfer.__version__
if flashinfer_version < "0.2.3": if version.parse(flashinfer_version) < version.parse("0.2.3"):
logger.warning_once( logger.warning_once(
"FlashInfer version >= 0.2.3 required. " "FlashInfer version >= 0.2.3 required. "
"Falling back to default sampling implementation.") "Falling back to default sampling implementation.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment