Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1819fbda
Unverified
Commit
1819fbda
authored
Jul 03, 2025
by
Jee Jee Li
Committed by
GitHub
Jul 03, 2025
Browse files
[Quantization] Bump to use latest bitsandbytes (#20424)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
7f036710
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
14 additions
and
14 deletions
+14
-14
docker/Dockerfile
docker/Dockerfile
+1
-1
docs/features/quantization/bnb.md
docs/features/quantization/bnb.md
+1
-1
requirements/nightly_torch_test.txt
requirements/nightly_torch_test.txt
+1
-1
requirements/test.in
requirements/test.in
+1
-1
requirements/test.txt
requirements/test.txt
+1
-1
vllm/config.py
vllm/config.py
+1
-1
vllm/model_executor/layers/quantization/bitsandbytes.py
vllm/model_executor/layers/quantization/bitsandbytes.py
+4
-4
vllm/model_executor/model_loader/bitsandbytes_loader.py
vllm/model_executor/model_loader/bitsandbytes_loader.py
+4
-4
No files found.
docker/Dockerfile
View file @
1819fbda
...
...
@@ -498,7 +498,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
if
[
"
$TARGETPLATFORM
"
=
"linux/arm64"
]
;
then
\
uv pip
install
--system
accelerate hf_transfer
'modelscope!=1.15.0'
'bitsandbytes>=0.42.0'
'timm==0.9.10'
boto3 runai-model-streamer runai-model-streamer[s3]
;
\
else
\
uv pip
install
--system
accelerate hf_transfer
'modelscope!=1.15.0'
'bitsandbytes>=0.4
5.3
'
'timm==0.9.10'
boto3 runai-model-streamer runai-model-streamer[s3]
;
\
uv pip
install
--system
accelerate hf_transfer
'modelscope!=1.15.0'
'bitsandbytes>=0.4
6.1
'
'timm==0.9.10'
boto3 runai-model-streamer runai-model-streamer[s3]
;
\
fi
ENV
VLLM_USAGE_SOURCE production-docker-image
...
...
docs/features/quantization/bnb.md
View file @
1819fbda
...
...
@@ -10,7 +10,7 @@ Compared to other quantization methods, BitsAndBytes eliminates the need for cal
Below are the steps to utilize BitsAndBytes with vLLM.
```
bash
pip
install
bitsandbytes>
=
0.4
5.3
pip
install
bitsandbytes>
=
0.4
6.1
```
vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint.
...
...
requirements/nightly_torch_test.txt
View file @
1819fbda
...
...
@@ -34,7 +34,7 @@ tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
schemathesis>=3.39.15 # Required for openai schema test.
# quantization
bitsandbytes>=0.4
5.3
bitsandbytes>=0.4
6.1
buildkite-test-collector==0.1.9
...
...
requirements/test.in
View file @
1819fbda
...
...
@@ -39,7 +39,7 @@ tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads.
schemathesis>=3.39.15 # Required for openai schema test.
# quantization
bitsandbytes
>
=0.4
5.3
bitsandbytes
=
=0.4
6.1
buildkite-test-collector==0.1.9
...
...
requirements/test.txt
View file @
1819fbda
...
...
@@ -45,7 +45,7 @@ backoff==2.2.1
# via
# -r requirements/test.in
# schemathesis
bitsandbytes==0.4
5.3
bitsandbytes==0.4
6.1
# via -r requirements/test.in
black==24.10.0
# via datamodel-code-generator
...
...
vllm/config.py
View file @
1819fbda
...
...
@@ -969,7 +969,7 @@ class ModelConfig:
def
_verify_bnb_config
(
self
)
->
None
:
"""
The current version of bitsandbytes (0.4
5.3
) with 8-bit models does not
The current version of bitsandbytes (0.4
6.1
) with 8-bit models does not
yet support CUDA graph.
# TODO Remove this when bitsandbytes supports.
"""
...
...
vllm/model_executor/layers/quantization/bitsandbytes.py
View file @
1819fbda
...
...
@@ -156,12 +156,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
def
__init__
(
self
,
quant_config
:
BitsAndBytesConfig
):
try
:
import
bitsandbytes
if
bitsandbytes
.
__version__
<
"0.4
5.3
"
:
if
bitsandbytes
.
__version__
<
"0.4
6.1
"
:
raise
ImportError
(
"bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.4
5.3
."
)
"install bitsandbytes>=0.4
6.1
."
)
except
ImportError
as
err
:
raise
ImportError
(
"Please install bitsandbytes>=0.4
5.3
via "
"`pip install bitsandbytes>=0.4
5.3
` to use "
raise
ImportError
(
"Please install bitsandbytes>=0.4
6.1
via "
"`pip install bitsandbytes>=0.4
6.1
` to use "
"bitsandbytes quantizer."
)
from
err
self
.
quant_config
=
quant_config
...
...
vllm/model_executor/model_loader/bitsandbytes_loader.py
View file @
1819fbda
...
...
@@ -183,12 +183,12 @@ class BitsAndBytesModelLoader(BaseModelLoader):
try
:
import
bitsandbytes
if
bitsandbytes
.
__version__
<
"0.4
5.3
"
:
if
bitsandbytes
.
__version__
<
"0.4
6.1
"
:
raise
ImportError
(
"bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.4
5.3
."
)
"install bitsandbytes>=0.4
6.1
."
)
except
ImportError
as
err
:
raise
ImportError
(
"Please install bitsandbytes>=0.4
5.3
via "
"`pip install bitsandbytes>=0.4
5.3
` to use "
raise
ImportError
(
"Please install bitsandbytes>=0.4
6.1
via "
"`pip install bitsandbytes>=0.4
6.1
` to use "
"bitsandbytes quantizer."
)
from
err
hf_weights_files
,
use_safetensors
=
self
.
_prepare_weights
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment