Unverified Commit 1819fbda authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Quantization] Bump to use latest bitsandbytes (#20424)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 7f036710
...@@ -498,7 +498,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -498,7 +498,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
else \ else \
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.46.1' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
fi fi
ENV VLLM_USAGE_SOURCE production-docker-image ENV VLLM_USAGE_SOURCE production-docker-image
......
...@@ -10,7 +10,7 @@ Compared to other quantization methods, BitsAndBytes eliminates the need for cal ...@@ -10,7 +10,7 @@ Compared to other quantization methods, BitsAndBytes eliminates the need for cal
Below are the steps to utilize BitsAndBytes with vLLM. Below are the steps to utilize BitsAndBytes with vLLM.
```bash ```bash
pip install bitsandbytes>=0.45.3 pip install bitsandbytes>=0.46.1
``` ```
vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint. vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint.
......
...@@ -34,7 +34,7 @@ tokenizers==0.21.1 ...@@ -34,7 +34,7 @@ tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads. huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
schemathesis>=3.39.15 # Required for openai schema test. schemathesis>=3.39.15 # Required for openai schema test.
# quantization # quantization
bitsandbytes>=0.45.3 bitsandbytes>=0.46.1
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
......
...@@ -39,7 +39,7 @@ tokenizers==0.21.1 ...@@ -39,7 +39,7 @@ tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads. huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads.
schemathesis>=3.39.15 # Required for openai schema test. schemathesis>=3.39.15 # Required for openai schema test.
# quantization # quantization
bitsandbytes>=0.45.3 bitsandbytes==0.46.1
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
......
...@@ -45,7 +45,7 @@ backoff==2.2.1 ...@@ -45,7 +45,7 @@ backoff==2.2.1
# via # via
# -r requirements/test.in # -r requirements/test.in
# schemathesis # schemathesis
bitsandbytes==0.45.3 bitsandbytes==0.46.1
# via -r requirements/test.in # via -r requirements/test.in
black==24.10.0 black==24.10.0
# via datamodel-code-generator # via datamodel-code-generator
......
...@@ -969,7 +969,7 @@ class ModelConfig: ...@@ -969,7 +969,7 @@ class ModelConfig:
def _verify_bnb_config(self) -> None: def _verify_bnb_config(self) -> None:
""" """
The current version of bitsandbytes (0.45.3) with 8-bit models does not The current version of bitsandbytes (0.46.1) with 8-bit models does not
yet support CUDA graph. yet support CUDA graph.
# TODO Remove this when bitsandbytes supports. # TODO Remove this when bitsandbytes supports.
""" """
......
...@@ -156,12 +156,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase): ...@@ -156,12 +156,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
def __init__(self, quant_config: BitsAndBytesConfig): def __init__(self, quant_config: BitsAndBytesConfig):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.45.3": if bitsandbytes.__version__ < "0.46.1":
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.45.3.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:
raise ImportError("Please install bitsandbytes>=0.45.3 via " raise ImportError("Please install bitsandbytes>=0.46.1 via "
"`pip install bitsandbytes>=0.45.3` to use " "`pip install bitsandbytes>=0.46.1` to use "
"bitsandbytes quantizer.") from err "bitsandbytes quantizer.") from err
self.quant_config = quant_config self.quant_config = quant_config
......
...@@ -183,12 +183,12 @@ class BitsAndBytesModelLoader(BaseModelLoader): ...@@ -183,12 +183,12 @@ class BitsAndBytesModelLoader(BaseModelLoader):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.45.3": if bitsandbytes.__version__ < "0.46.1":
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.45.3.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:
raise ImportError("Please install bitsandbytes>=0.45.3 via " raise ImportError("Please install bitsandbytes>=0.46.1 via "
"`pip install bitsandbytes>=0.45.3` to use " "`pip install bitsandbytes>=0.46.1` to use "
"bitsandbytes quantizer.") from err "bitsandbytes quantizer.") from err
hf_weights_files, use_safetensors = self._prepare_weights( hf_weights_files, use_safetensors = self._prepare_weights(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment