Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
1c774ece
Commit
1c774ece
authored
Jul 10, 2023
by
Tim Dettmers
Browse files
Added ARCH guard for bfloat16 computations.
parent
0a1cced3
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
108 additions
and
130 deletions
+108
-130
CHANGELOG.md
CHANGELOG.md
+3
-0
csrc/kernels.cu
csrc/kernels.cu
+19
-4
deploy.sh
deploy.sh
+86
-126
No files found.
CHANGELOG.md
View file @
1c774ece
...
@@ -255,3 +255,6 @@ Features:
...
@@ -255,3 +255,6 @@ Features:
Bug fixes:
Bug fixes:
-
Added
`device`
variable for bitsandbytes layers to be compatible with PyTorch layers.
-
Added
`device`
variable for bitsandbytes layers to be compatible with PyTorch layers.
Deprecated:
-
Binaries for CUDA 11.2, 11.3, 11.6 no longer ship with
`pip install bitsandbytes`
and need to be compiled from source.
csrc/kernels.cu
View file @
1c774ece
...
@@ -3312,6 +3312,7 @@ __device__ static float nf4_data[16] = {-1.0, -0.6961928009986877, -0.5250730514
...
@@ -3312,6 +3312,7 @@ __device__ static float nf4_data[16] = {-1.0, -0.6961928009986877, -0.5250730514
template
<
typename
T
,
int
THREADS
>
__global__
void
kgemm_4bit_inference
(
int
M
,
int
N
,
int
K
,
T
*
__restrict__
const
A
,
unsigned
char
*
B
,
float
*
absmax
,
T
*
out
,
int
lda
,
int
ldb
,
int
ldc
,
int
blocksize
)
template
<
typename
T
,
int
THREADS
>
__global__
void
kgemm_4bit_inference
(
int
M
,
int
N
,
int
K
,
T
*
__restrict__
const
A
,
unsigned
char
*
B
,
float
*
absmax
,
T
*
out
,
int
lda
,
int
ldb
,
int
ldc
,
int
blocksize
)
{
{
#if __CUDA_ARCH__ >= 750
using
namespace
nvcuda
;
using
namespace
nvcuda
;
int
col_offset
=
blockIdx
.
x
*
32
;
int
col_offset
=
blockIdx
.
x
*
32
;
const
int
warp_id
=
threadIdx
.
x
/
32
;
const
int
warp_id
=
threadIdx
.
x
/
32
;
...
@@ -3517,6 +3518,7 @@ template <typename T, int THREADS> __global__ void kgemm_4bit_inference(int M, i
...
@@ -3517,6 +3518,7 @@ template <typename T, int THREADS> __global__ void kgemm_4bit_inference(int M, i
if
(
col_offset
+
warp_lane
<
M
)
if
(
col_offset
+
warp_lane
<
M
)
out
[
col_offset
+
warp_lane
]
=
smem_C
[
warp_lane
];
out
[
col_offset
+
warp_lane
]
=
smem_C
[
warp_lane
];
#endif
}
}
#define num_values_4bit 32
#define num_values_4bit 32
...
@@ -3544,7 +3546,7 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
...
@@ -3544,7 +3546,7 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
T
local_absmax
=
T
(
0.0
f
);
T
local_absmax
=
T
(
0.0
f
);
for
(
int
i
=
threadIdx
.
x
;
i
<
16
;
i
++
)
for
(
int
i
=
threadIdx
.
x
;
i
<
16
;
i
++
)
quant_map
[
i
]
=
datatype
[
i
];
quant_map
[
i
]
=
T
(
datatype
[
i
]
)
;
__syncthreads
();
__syncthreads
();
...
@@ -3577,8 +3579,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
...
@@ -3577,8 +3579,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
#pragma unroll
#pragma unroll
for
(
int
k
=
0
;
k
<
num_values_4bit
;
k
++
)
for
(
int
k
=
0
;
k
<
num_values_4bit
;
k
++
)
{
{
#if __CUDA_ARCH__ >= 800
local_B
[
k
*
2
]
=
quant_map
[
local_B_4bit
[
k
]
>>
4
]
*
local_absmax
;
local_B
[
k
*
2
]
=
quant_map
[
local_B_4bit
[
k
]
>>
4
]
*
local_absmax
;
local_B
[
k
*
2
+
1
]
=
quant_map
[
local_B_4bit
[
k
]
&
0x0F
]
*
local_absmax
;
local_B
[
k
*
2
+
1
]
=
quant_map
[
local_B_4bit
[
k
]
&
0x0F
]
*
local_absmax
;
#else
// bf16 multipliation not supported
local_B
[
k
*
2
]
=
T
((
float
)
quant_map
[
local_B_4bit
[
k
]
>>
4
]
*
(
float
)
local_absmax
);
local_B
[
k
*
2
+
1
]
=
T
((
float
)
quant_map
[
local_B_4bit
[
k
]
&
0x0F
]
*
(
float
)
local_absmax
);
#endif
}
}
if
(
inner_idx
+
num_values_4bit
)
if
(
inner_idx
+
num_values_4bit
)
...
@@ -3609,7 +3617,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
...
@@ -3609,7 +3617,14 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
#pragma unroll
#pragma unroll
for
(
int
k
=
0
;
k
<
num_values_4bit
;
k
++
)
for
(
int
k
=
0
;
k
<
num_values_4bit
;
k
++
)
{
#if __CUDA_ARCH__ >= 800
local_C
+=
(
float
)(
local_A
[
k
]
*
local_B
[
k
]);
local_C
+=
(
float
)(
local_A
[
k
]
*
local_B
[
k
]);
#else
// bf16 multipliation not supported
local_C
+=
((
float
)
local_A
[
k
]
*
(
float
)
local_B
[
k
]);
#endif
}
}
}
...
...
deploy.sh
View file @
1c774ece
...
@@ -10,103 +10,73 @@ if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then
...
@@ -10,103 +10,73 @@ if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then
fi
fi
module unload cuda
&&
echo
"no module function available. Probably not on a slurm cluster."
#module unload cuda && echo "no module function available. Probably not on a slurm cluster."
module unload gcc
&&
echo
"no module function available. Probably not on a slurm cluster."
#module unload gcc && echo "no module function available. Probably not on a slurm cluster."
#
rm
-rf
dist build
#rm -rf dist build
make cleaneggs
#make cleaneggs
make cleanlibs
#make cleanlibs
#
make clean
#make clean
export
CUDA_HOME
=
#export CUDA_HOME=
export
CUDA_VERSION
=
#export CUDA_VERSION=
make cpuonly
CUDA_VERSION
=
"CPU"
#make cpuonly CUDA_VERSION="CPU"
#
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cpu.so"
]
;
then
#if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist.
# # Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
# echo "Compilation unsuccessul!" 1>&2
exit
64
# exit 64
fi
#fi
#
make clean
#make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.0
#export CUDA_HOME=$BASE_PATH/cuda-11.0
make cuda110
CUDA_VERSION
=
110
#make cuda110 CUDA_VERSION=110
#
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda110.so"
]
;
then
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist.
# # Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
# echo "Compilation unsuccessul!" 1>&2
exit
64
# exit 64
fi
#fi
#
make clean
#make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.1
#export CUDA_HOME=$BASE_PATH/cuda-11.1
make cuda11x
CUDA_VERSION
=
111
#make cuda11x CUDA_VERSION=111
#
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda111.so"
]
;
then
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist.
# # Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
# echo "Compilation unsuccessul!" 1>&2
exit
64
# exit 64
fi
#fi
#
make clean
#make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.2
#export CUDA_HOME=$BASE_PATH/cuda-11.4
make cuda11x
CUDA_VERSION
=
112
#make cuda11x CUDA_VERSION=114
#
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda112.so"
]
;
then
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist.
# # Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
# echo "Compilation unsuccessul!" 1>&2
exit
64
# exit 64
fi
#fi
#
make clean
#make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.3
#export CUDA_HOME=$BASE_PATH/cuda-11.5
make cuda11x
CUDA_VERSION
=
113
#make cuda11x CUDA_VERSION=115
#
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda113.so"
]
;
then
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist.
# # Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
# echo "Compilation unsuccessul!" 1>&2
exit
64
# exit 64
fi
#fi
#
make clean
#make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.4
#export CUDA_HOME=$BASE_PATH/cuda-11.7
make cuda11x
CUDA_VERSION
=
114
#make cuda11x CUDA_VERSION=117
#
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda114.so"
]
;
then
#if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist.
# # Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
# echo "Compilation unsuccessul!" 1>&2
exit
64
# exit 64
fi
#fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.5
make cuda11x
CUDA_VERSION
=
115
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda115.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.6
make cuda11x
CUDA_VERSION
=
116
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda116.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.7
make cuda11x
CUDA_VERSION
=
117
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda117.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.8
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.8
...
@@ -138,12 +108,11 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then
...
@@ -138,12 +108,11 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then
exit
64
exit
64
fi
fi
make clean
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-1
1.0
export
CUDA_HOME
=
$BASE_PATH
/cuda-1
2.2
make cuda1
10_nomatmul
CUDA_VERSION
=
1
10
make cuda1
2x
CUDA_VERSION
=
1
22
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda1
10_nocublaslt
.so"
]
;
then
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda1
22
.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
echo
"Compilation unsuccessul!"
1>&2
exit
64
exit
64
...
@@ -151,30 +120,21 @@ fi
...
@@ -151,30 +120,21 @@ fi
make clean
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
1
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
0
make cuda11
x
_nomatmul
CUDA_VERSION
=
11
1
make cuda11
0
_nomatmul
CUDA_VERSION
=
11
0
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
1
_nocublaslt.so"
]
;
then
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
0
_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
echo
"Compilation unsuccessul!"
1>&2
exit
64
exit
64
fi
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.2
make cuda11x_nomatmul
CUDA_VERSION
=
112
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda112_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
3
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.
1
make cuda11x_nomatmul
CUDA_VERSION
=
11
3
make cuda11x_nomatmul
CUDA_VERSION
=
11
1
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
3
_nocublaslt.so"
]
;
then
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda11
1
_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
echo
"Compilation unsuccessul!"
1>&2
exit
64
exit
64
...
@@ -200,16 +160,6 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then
...
@@ -200,16 +160,6 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then
exit
64
exit
64
fi
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.6
make cuda11x_nomatmul
CUDA_VERSION
=
116
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda116_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
make clean
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.7
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.7
make cuda11x_nomatmul
CUDA_VERSION
=
117
make cuda11x_nomatmul
CUDA_VERSION
=
117
...
@@ -250,5 +200,15 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then
...
@@ -250,5 +200,15 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then
exit
64
exit
64
fi
fi
make clean
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.2
make cuda12x_nomatmul
CUDA_VERSION
=
122
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
python
-m
build
python
-m
build
python
-m
twine upload dist/
*
--verbose
python
-m
twine upload dist/
*
--verbose
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment