Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
1c774ece
Commit
1c774ece
authored
Jul 10, 2023
by
Tim Dettmers
Browse files
Added ARCH guard for bfloat16 computations.
parent
0a1cced3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
108 additions
and
130 deletions
+108
-130
CHANGELOG.md
CHANGELOG.md
+3
-0
csrc/kernels.cu
csrc/kernels.cu
+19
-4
deploy.sh
deploy.sh
+86
-126
No files found.
CHANGELOG.md
View file @
1c774ece
...
...
@@ -255,3 +255,6 @@ Features:
Bug fixes:
-
Added
`device`
variable for bitsandbytes layers to be compatible with PyTorch layers.
Deprecated:
-
Binaries for CUDA 11.2, 11.3, 11.6 no longer ship with
`pip install bitsandbytes`
and need to be compiled from source.
csrc/kernels.cu
View file @
1c774ece
...
...
@@ -3312,6 +3312,7 @@ __device__ static float nf4_data[16] = {-1.0, -0.6961928009986877, -0.5250730514
template
<
typename
T
,
int
THREADS
>
__global__
void
kgemm_4bit_inference
(
int
M
,
int
N
,
int
K
,
T
*
__restrict__
const
A
,
unsigned
char
*
B
,
float
*
absmax
,
T
*
out
,
int
lda
,
int
ldb
,
int
ldc
,
int
blocksize
)
{
#if __CUDA_ARCH__ >= 750
using
namespace
nvcuda
;
int
col_offset
=
blockIdx
.
x
*
32
;
const
int
warp_id
=
threadIdx
.
x
/
32
;
...
...
@@ -3517,6 +3518,7 @@ template <typename T, int THREADS> __global__ void kgemm_4bit_inference(int M, i
if
(
col_offset
+
warp_lane
<
M
)
out
[
col_offset
+
warp_lane
]
=
smem_C
[
warp_lane
];
#endif
}
#define num_values_4bit 32
...
...
@@ -3544,7 +3546,7 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
T
local_absmax
=
T
(
0.0
f
);
for
(
int
i
=
threadIdx
.
x
;
i
<
16
;
i
++
)
quant_map
[
i
]
=
datatype
[
i
];
quant_map
[
i
]
=
T
(
datatype
[
i
]
)
;
__syncthreads
();
...
...
@@ -3577,8 +3579,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
#pragma unroll
for
(
int
k
=
0
;
k
<
num_values_4bit
;
k
++
)
{
local_B
[
k
*
2
]
=
quant_map
[
local_B_4bit
[
k
]
>>
4
]
*
local_absmax
;
local_B
[
k
*
2
+
1
]
=
quant_map
[
local_B_4bit
[
k
]
&
0x0F
]
*
local_absmax
;
#if __CUDA_ARCH__ >= 800
local_B
[
k
*
2
]
=
quant_map
[
local_B_4bit
[
k
]
>>
4
]
*
local_absmax
;
local_B
[
k
*
2
+
1
]
=
quant_map
[
local_B_4bit
[
k
]
&
0x0F
]
*
local_absmax
;
#else
// bf16 multipliation not supported
local_B
[
k
*
2
]
=
T
((
float
)
quant_map
[
local_B_4bit
[
k
]
>>
4
]
*
(
float
)
local_absmax
);
local_B
[
k
*
2
+
1
]
=
T
((
float
)
quant_map
[
local_B_4bit
[
k
]
&
0x0F
]
*
(
float
)
local_absmax
);
#endif
}
if
(
inner_idx
+
num_values_4bit
)
...
...
@@ -3609,7 +3617,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
#pragma unroll
for
(
int
k
=
0
;
k
<
num_values_4bit
;
k
++
)
local_C
+=
(
float
)(
local_A
[
k
]
*
local_B
[
k
]);
{
#if __CUDA_ARCH__ >= 800
local_C
+=
(
float
)(
local_A
[
k
]
*
local_B
[
k
]);
#else
// bf16 multipliation not supported
local_C
+=
((
float
)
local_A
[
k
]
*
(
float
)
local_B
[
k
]);
#endif
}
}
...
...
deploy.sh
View file @
1c774ece
...
...
@@ -10,103 +10,73 @@ if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then
fi
module unload cuda
&&
echo
"no module function available. Probably not on a slurm cluster."
module unload gcc
&&
echo
"no module function available. Probably not on a slurm cluster."
rm
-rf
dist build
make cleaneggs
make cleanlibs
make clean
export
CUDA_HOME
=
export
CUDA_VERSION
=
make cpuonly
CUDA_VERSION
=
"CPU"
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cpu.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.0
make cuda110
CUDA_VERSION
=
110
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda110.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.1
make cuda11x
CUDA_VERSION
=
111
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda111.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.2
make cuda11x
CUDA_VERSION
=
112
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda112.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.3
make cuda11x
CUDA_VERSION
=
113
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda113.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.4
make cuda11x
CUDA_VERSION
=
114
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda114.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.5
make cuda11x
CUDA_VERSION
=
115
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda115.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.6
make cuda11x
CUDA_VERSION
=
116
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda116.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.7
make cuda11x
CUDA_VERSION
=
117
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda117.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
#module unload cuda && echo "no module function available. Probably not on a slurm cluster."
#module unload gcc && echo "no module function available. Probably not on a slurm cluster."
#
#rm -rf dist build
#make cleaneggs
#make cleanlibs
#
#make clean
#export CUDA_HOME=
#export CUDA_VERSION=
#make cpuonly CUDA_VERSION="CPU"
#
#if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then
# # Control will enter here if $DIRECTORY doesn't exist.
# echo "Compilation unsuccessul!" 1>&2
# exit 64
#fi
#
#make clean
#export CUDA_HOME=$BASE_PATH/cuda-11.0
#make cuda110 CUDA_VERSION=110
#
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then
# # Control will enter here if $DIRECTORY doesn't exist.
# echo "Compilation unsuccessul!" 1>&2
# exit 64
#fi
#
#make clean
#export CUDA_HOME=$BASE_PATH/cuda-11.1
#make cuda11x CUDA_VERSION=111
#
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then
# # Control will enter here if $DIRECTORY doesn't exist.
# echo "Compilation unsuccessul!" 1>&2
# exit 64
#fi
#
#make clean
#export CUDA_HOME=$BASE_PATH/cuda-11.4
#make cuda11x CUDA_VERSION=114
#
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then
# # Control will enter here if $DIRECTORY doesn't exist.
# echo "Compilation unsuccessul!" 1>&2
# exit 64
#fi
#
#make clean
#export CUDA_HOME=$BASE_PATH/cuda-11.5
#make cuda11x CUDA_VERSION=115
#
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then
# # Control will enter here if $DIRECTORY doesn't exist.
# echo "Compilation unsuccessul!" 1>&2
# exit 64
#fi
#
#make clean
#export CUDA_HOME=$BASE_PATH/cuda-11.7
#make cuda11x CUDA_VERSION=117
#
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then
# # Control will enter here if $DIRECTORY doesn't exist.
# echo "Compilation unsuccessul!" 1>&2
# exit 64
#fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.8
...
...
@@ -138,12 +108,11 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-1
1.0
make cuda1
10_nomatmul
CUDA_VERSION
=
1
10
export
CUDA_HOME
=
$BASE_PATH
/cuda-1
2.2
make cuda1
2x
CUDA_VERSION
=
1
22
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda1
10_nocublaslt
.so"
]
;
then
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda1
22
.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
...
...
@@ -151,30 +120,21 @@ fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
1
make cuda11
x
_nomatmul
CUDA_VERSION
=
11
1
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
0
make cuda11
0
_nomatmul
CUDA_VERSION
=
11
0
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
1
_nocublaslt.so"
]
;
then
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
0
_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.2
make cuda11x_nomatmul
CUDA_VERSION
=
112
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda112_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
3
make cuda11x_nomatmul
CUDA_VERSION
=
11
3
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
1
make cuda11x_nomatmul
CUDA_VERSION
=
11
1
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
3
_nocublaslt.so"
]
;
then
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
1
_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
...
...
@@ -200,16 +160,6 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.6
make cuda11x_nomatmul
CUDA_VERSION
=
116
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda116_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.7
make cuda11x_nomatmul
CUDA_VERSION
=
117
...
...
@@ -250,5 +200,15 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.2
make cuda12x_nomatmul
CUDA_VERSION
=
122
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
python
-m
build
python
-m
twine upload dist/
*
--verbose
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment