Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
76885a41
Unverified
Commit
76885a41
authored
Apr 02, 2024
by
Titus
Committed by
GitHub
Apr 02, 2024
Browse files
Merge pull request #1160 from matthewdouglas/quant4bit-blocksize4096
Fix 4bit quantization with blocksize = 4096
parents
2965c765
a4714569
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
15 deletions
+30
-15
bitsandbytes/functional.py
bitsandbytes/functional.py
+4
-3
csrc/ops.cu
csrc/ops.cu
+1
-1
install_cuda.py
install_cuda.py
+2
-6
tests/test_functional.py
tests/test_functional.py
+23
-5
No files found.
bitsandbytes/functional.py
View file @
76885a41
...
@@ -1087,11 +1087,12 @@ def get_4bit_type(typename, device=None, blocksize=64):
...
@@ -1087,11 +1087,12 @@ def get_4bit_type(typename, device=None, blocksize=64):
if
data
is
None
:
if
data
is
None
:
raise
NotImplementedError
(
f
"Typename
{
typename
}
not supported"
)
raise
NotImplementedError
(
f
"Typename
{
typename
}
not supported"
)
data
=
Tensor
(
data
)
data
=
torch
.
tensor
(
data
,
device
=
device
)
data
/=
data
.
abs
().
max
()
data
.
div_
(
data
.
abs
().
max
())
assert
data
.
numel
()
==
16
assert
data
.
numel
()
==
16
return
data
.
to
(
device
)
return
data
def
quantize_fp4
(
def
quantize_fp4
(
...
...
csrc/ops.cu
View file @
76885a41
...
@@ -58,7 +58,7 @@ template <typename T, int STOCHASTIC, int DATA_TYPE> void quantizeBlockwise(floa
...
@@ -58,7 +58,7 @@ template <typename T, int STOCHASTIC, int DATA_TYPE> void quantizeBlockwise(floa
num_blocks
=
n
%
blocksize
==
0
?
num_blocks
:
num_blocks
+
1
;
num_blocks
=
n
%
blocksize
==
0
?
num_blocks
:
num_blocks
+
1
;
if
(
blocksize
==
4096
)
if
(
blocksize
==
4096
)
kQuantizeBlockwise
<
T
,
4096
,
4
,
STOCHASTIC
,
0
><<<
num_blocks
,
1024
>>>
(
code
,
A
,
absmax
,
out
,
rand
,
rand_offset
,
n
);
kQuantizeBlockwise
<
T
,
4096
,
4
,
STOCHASTIC
,
DATA_TYPE
><<<
num_blocks
,
1024
>>>
(
code
,
A
,
absmax
,
out
,
rand
,
rand_offset
,
n
);
else
if
(
blocksize
==
2048
)
else
if
(
blocksize
==
2048
)
kQuantizeBlockwise
<
T
,
2048
,
4
,
0
,
DATA_TYPE
><<<
num_blocks
,
512
>>>
(
code
,
A
,
absmax
,
out
,
rand
,
rand_offset
,
n
);
kQuantizeBlockwise
<
T
,
2048
,
4
,
0
,
DATA_TYPE
><<<
num_blocks
,
512
>>>
(
code
,
A
,
absmax
,
out
,
rand
,
rand_offset
,
n
);
else
if
(
blocksize
==
1024
)
else
if
(
blocksize
==
1024
)
...
...
install_cuda.py
View file @
76885a41
...
@@ -77,9 +77,7 @@ def main():
...
@@ -77,9 +77,7 @@ def main():
download_path
=
"/tmp"
# default download path
download_path
=
"/tmp"
# default download path
if
len
(
sys
.
argv
)
<
2
:
if
len
(
sys
.
argv
)
<
2
:
print
(
print
(
"Usage: python install_cuda.py <version/all> [user/system] [download_path]"
)
"Usage: python install_cuda.py <version/all> [user/system] [download_path]"
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
version
=
sys
.
argv
[
1
]
version
=
sys
.
argv
[
1
]
...
@@ -100,9 +98,7 @@ def main():
...
@@ -100,9 +98,7 @@ def main():
elif
version
in
cuda_versions
:
elif
version
in
cuda_versions
:
install_cuda
(
version
,
base_path
,
download_path
)
install_cuda
(
version
,
base_path
,
download_path
)
else
:
else
:
print
(
print
(
f
"Invalid CUDA version:
{
version
}
. Available versions are:
{
', '
.
join
(
cuda_versions
.
keys
())
}
"
)
f
"Invalid CUDA version:
{
version
}
. Available versions are:
{
', '
.
join
(
cuda_versions
.
keys
())
}
"
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
...
...
tests/test_functional.py
View file @
76885a41
...
@@ -1928,7 +1928,9 @@ def test_bench_dequantization():
...
@@ -1928,7 +1928,9 @@ def test_bench_dequantization():
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float32
,
torch
.
float16
,
torch
.
bfloat16
],
ids
=
describe_dtype
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float32
,
torch
.
float16
,
torch
.
bfloat16
],
ids
=
describe_dtype
)
def
test_fp4_quant
(
dtype
):
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
,
256
,
512
,
1024
,
2048
,
4096
])
def
test_4bit_quant
(
dtype
,
quant_type
,
blocksize
):
vals
=
list
(
product
([
0
,
1
],
repeat
=
4
))
vals
=
list
(
product
([
0
,
1
],
repeat
=
4
))
code
=
{}
code
=
{}
...
@@ -1953,8 +1955,8 @@ def test_fp4_quant(dtype):
...
@@ -1953,8 +1955,8 @@ def test_fp4_quant(dtype):
code
[
idx
]
=
result
code
[
idx
]
=
result
A1
=
torch
.
randn
(
1024
,
1024
,
device
=
"cuda"
,
dtype
=
dtype
)
A1
=
torch
.
randn
(
1024
,
1024
,
device
=
"cuda"
,
dtype
=
dtype
)
qa
,
SA
=
F
.
quantize_
fp4
(
A1
,
blocksize
=
64
)
qa
,
SA
=
F
.
quantize_
4bit
(
A1
,
blocksize
=
blocksize
,
quant_type
=
quant_type
)
A2
=
F
.
dequantize_
fp4
(
qa
,
SA
)
A2
=
F
.
dequantize_
4bit
(
qa
,
SA
,
blocksize
=
blocksize
,
quant_type
=
quant_type
)
err
=
(
A1
-
A2
).
abs
().
float
()
err
=
(
A1
-
A2
).
abs
().
float
()
relerr
=
(
err
/
(
A1
.
abs
().
float
()
+
1e-8
)).
mean
()
relerr
=
(
err
/
(
A1
.
abs
().
float
()
+
1e-8
)).
mean
()
...
@@ -1962,8 +1964,24 @@ def test_fp4_quant(dtype):
...
@@ -1962,8 +1964,24 @@ def test_fp4_quant(dtype):
err
=
err
.
mean
()
err
=
err
.
mean
()
assert
A2
.
dtype
==
dtype
assert
A2
.
dtype
==
dtype
assert
err
.
item
()
<
0.1
assert
relerr
.
item
()
<
0.28
# With larger block sizes, we can expect this to blow up.
# At blocksize>=1024, don't even bother looking at relerr.
if
blocksize
<=
64
:
assert
err
.
item
()
<
0.1
assert
relerr
.
item
()
<
0.28
elif
blocksize
<=
256
:
assert
err
.
item
()
<
0.11
assert
relerr
.
item
()
<
0.30
elif
blocksize
<=
512
:
assert
err
.
item
()
<
0.12
assert
relerr
.
item
()
<
0.31
elif
quant_type
==
"fp4"
:
# 1024 => 0.48, 2048 => 0.52, 4096 => 0.56
assert
err
.
item
()
<
0.08
+
math
.
log2
(
blocksize
)
*
4e-2
else
:
# 1024 => 0.8, 2048 => 0.88, 4096 => 0.96
assert
err
.
item
()
<
math
.
log2
(
blocksize
)
*
8e-2
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment