Unverified Commit 8c507d92 authored by Aarni Koskela's avatar Aarni Koskela Committed by GitHub
Browse files

Enable crate-ci/typos lint; fix typos (#1005)


Co-authored-by: default avatarTitus von Koeller <titus@vonkoeller.com>

fix erroneous correction
parent 8a14c638
...@@ -17,3 +17,7 @@ repos: ...@@ -17,3 +17,7 @@ repos:
- id: mixed-line-ending - id: mixed-line-ending
args: args:
- --fix=lf - --fix=lf
- repo: https://github.com/crate-ci/typos
rev: v1.17.2
hooks:
- id: typos
[files]
[default.extend-identifiers]
[type.py.extend-words]
"BA" = "BA" # used as a commented-out variable in tests
[type.cuda.extend-words]
"subtile" = "subtile"
"subtiles" = "subtiles"
"transation" = "transation" # TODO: is this transition, transaction, translation..?
...@@ -36,8 +36,8 @@ if __name__ == '__main__': ...@@ -36,8 +36,8 @@ if __name__ == '__main__':
('x_quantize_rowwise', 'P', '--', 'C4', 'Quantize rowwise X (switchback)'), ('x_quantize_rowwise', 'P', '--', 'C4', 'Quantize rowwise X (switchback)'),
('g_quantize_rowwise', 'P', '-.', 'C4', 'Quantize rowwise G (switchback)'), ('g_quantize_rowwise', 'P', '-.', 'C4', 'Quantize rowwise G (switchback)'),
('w_quantize_global', '.', '--', 'C4', 'Quatnize global W (switchback)'), ('w_quantize_global', '.', '--', 'C4', 'Quantize global W (switchback)'),
('w_quantize_global_transpose', '.', '-.', 'C4', 'Quantize gloabl and\ntranspose W (switchback)'), ('w_quantize_global_transpose', '.', '-.', 'C4', 'Quantize global and\ntranspose W (switchback)'),
]: ]:
xs = [] xs = []
ys = [] ys = []
......
...@@ -4,7 +4,7 @@ extract factors the build is dependent on: ...@@ -4,7 +4,7 @@ extract factors the build is dependent on:
[ ] TODO: Q - What if we have multiple GPUs of different makes? [ ] TODO: Q - What if we have multiple GPUs of different makes?
- CUDA version - CUDA version
- Software: - Software:
- CPU-only: only CPU quantization functions (no optimizer, no matrix multipl) - CPU-only: only CPU quantization functions (no optimizer, no matrix multiply)
- CuBLAS-LT: full-build 8-bit optimizer - CuBLAS-LT: full-build 8-bit optimizer
- no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`) - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)
...@@ -263,7 +263,7 @@ def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None: ...@@ -263,7 +263,7 @@ def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None:
warning_msg = ( warning_msg = (
f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. " f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. "
"We select the PyTorch default libcudart.so, which is {torch.version.cuda}," "We select the PyTorch default libcudart.so, which is {torch.version.cuda},"
"but this might missmatch with the CUDA version that is needed for bitsandbytes." "but this might mismatch with the CUDA version that is needed for bitsandbytes."
"To override this behavior set the BNB_CUDA_VERSION=<version string, e.g. 122> environmental variable" "To override this behavior set the BNB_CUDA_VERSION=<version string, e.g. 122> environmental variable"
"For example, if you want to use the CUDA version 122" "For example, if you want to use the CUDA version 122"
"BNB_CUDA_VERSION=122 python ..." "BNB_CUDA_VERSION=122 python ..."
......
...@@ -120,7 +120,7 @@ class GlobalPageManager: ...@@ -120,7 +120,7 @@ class GlobalPageManager:
return cls._instance return cls._instance
def prefetch_all(self, to_cpu=False): def prefetch_all(self, to_cpu=False):
# assume the first added, will be hte # assume the first added, will be the
# ones that are used first, so swap them in last # ones that are used first, so swap them in last
# in the case they are evicted again # in the case they are evicted again
for t in self.paged_tensors[::-1]: for t in self.paged_tensors[::-1]:
...@@ -219,7 +219,7 @@ def elementwise_func(func_name, A, B, value, prefetch=True): ...@@ -219,7 +219,7 @@ def elementwise_func(func_name, A, B, value, prefetch=True):
# paged function are fully asynchronous # paged function are fully asynchronous
# if we return from this function, we want to the tensor # if we return from this function, we want to the tensor
# to be in the correct state, that is the final state after the # to be in the correct state, that is the final state after the
# operation occured. So we synchronize. # operation occurred. So we synchronize.
torch.cuda.synchronize() torch.cuda.synchronize()
def fill(A, value, device=None, prefetch=True): elementwise_func('fill', A, None, value) def fill(A, value, device=None, prefetch=True): elementwise_func('fill', A, None, value)
...@@ -589,7 +589,7 @@ def estimate_quantiles(A: Tensor, out: Optional[torch.Tensor] = None, offset: fl ...@@ -589,7 +589,7 @@ def estimate_quantiles(A: Tensor, out: Optional[torch.Tensor] = None, offset: fl
class QuantState: class QuantState:
"""container for quantization state components to work with Params4bit and similar clases""" """container for quantization state components to work with Params4bit and similar classes"""
valid_quant_types = ('fp4', 'nf4') valid_quant_types = ('fp4', 'nf4')
valid_qs_type_keys = [f"bitsandbytes__{x}" for x in valid_quant_types] valid_qs_type_keys = [f"bitsandbytes__{x}" for x in valid_quant_types]
valid_qs_keys = ['absmax', 'quant_map', 'nested_absmax', 'nested_quant_map', 'quant_state', 'quant_type', valid_qs_keys = ['absmax', 'quant_map', 'nested_absmax', 'nested_quant_map', 'quant_state', 'quant_type',
......
...@@ -134,10 +134,10 @@ __device__ unsigned char dQuantizeFP4(float x) ...@@ -134,10 +134,10 @@ __device__ unsigned char dQuantizeFP4(float x)
// we do a binary search // we do a binary search
// the pivots are divided by 12 (the FP4 absmax) // the pivots are divided by 12 (the FP4 absmax)
// since we assum input data is in [-1.0, 1.0] // since we assume input data is in [-1.0, 1.0]
// !be careful here, its easy to make a mistake // !be careful here, its easy to make a mistake
// that is difficult to noice if you add an extra // that is difficult to notice if you add an extra
// zero somewhere! // zero somewhere!
int sign = x < 0 ? 0b1000 : 0b0000; int sign = x < 0 ? 0b1000 : 0b0000;
...@@ -2259,8 +2259,8 @@ template<typename T, int THREADS, int ITEMS_PER_THREAD, int TILE_ROWS, int TILE_ ...@@ -2259,8 +2259,8 @@ template<typename T, int THREADS, int ITEMS_PER_THREAD, int TILE_ROWS, int TILE_
} }
// 4. store data via atomicMax // 4. store data via atomicMax
// to store col data efficienctly we need to rewrite the smem blocked data [0, 1, 2, 3...] for t0 // to store col data efficiently we need to rewrite the smem blocked data [0, 1, 2, 3...] for t0
// into a striped arangement: [0, 8, 16, 24, ..] for t0 // into a striped arrangement: [0, 8, 16, 24, ..] for t0
__syncthreads(); __syncthreads();
BlockExchange(temp_storage.exchange).BlockedToStriped(local_col_absmax_values); BlockExchange(temp_storage.exchange).BlockedToStriped(local_col_absmax_values);
...@@ -2310,7 +2310,7 @@ template <int ITEMS_PER_THREAD, int SUBTILE_ROWS, int THREADS>__global__ void kd ...@@ -2310,7 +2310,7 @@ template <int ITEMS_PER_THREAD, int SUBTILE_ROWS, int THREADS>__global__ void kd
// data is in 32 column-tile major with tile width 32 columns and numRows rows // data is in 32 column-tile major with tile width 32 columns and numRows rows
// L1. Load sub-tile row/col statistics. Each thread only holds 1 col, load rows into shared memory. // L1. Load sub-tile row/col statistics. Each thread only holds 1 col, load rows into shared memory.
// L2. Load data in warp-striped arangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3]) // L2. Load data in warp-striped arrangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3])
// C1. Compute val(row_stat*col_stat)/(127*127) (load 1/(127*127 into register)) // C1. Compute val(row_stat*col_stat)/(127*127) (load 1/(127*127 into register))
// C2. Compute normalization values and store col values in register // C2. Compute normalization values and store col values in register
// S1. Store C1 into 16-bit output // S1. Store C1 into 16-bit output
...@@ -2383,7 +2383,7 @@ template <int ITEMS_PER_THREAD, int SUBTILE_ROWS, int THREADS>__global__ void kd ...@@ -2383,7 +2383,7 @@ template <int ITEMS_PER_THREAD, int SUBTILE_ROWS, int THREADS>__global__ void kd
if(valid_items <= 0) // the sub-tile might have more elements than the tile itself if(valid_items <= 0) // the sub-tile might have more elements than the tile itself
break; break;
// L2. Load data in warp-striped arangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3]) // L2. Load data in warp-striped arrangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3])
LoadInt32(loadint32).Load(&(A[subtile_idx]), local_values, valid_items, 0); LoadInt32(loadint32).Load(&(A[subtile_idx]), local_values, valid_items, 0);
ExchangeInt32(exchangeint32).BlockedToWarpStriped(local_values, local_values); ExchangeInt32(exchangeint32).BlockedToWarpStriped(local_values, local_values);
...@@ -2650,7 +2650,7 @@ template <int THREADS, int ITEMS_PER_THREAD, int TILE_ROWS, int TILE_COLS, int T ...@@ -2650,7 +2650,7 @@ template <int THREADS, int ITEMS_PER_THREAD, int TILE_ROWS, int TILE_COLS, int T
// row1 [col0 col1 ... col31] // row1 [col0 col1 ... col31]
// ... // ...
// //
// As such we read consequtive entries with 256 threads (8rows x 32 columns) // As such we read consecutive entries with 256 threads (8rows x 32 columns)
// as j increase, the row increase by a factor of 8 // as j increase, the row increase by a factor of 8
// We load 8 rows per subrow loop, and subrow increase by 8 per loop // We load 8 rows per subrow loop, and subrow increase by 8 per loop
// so we have an offset of 8 rows every loop or (subrow/warps)*8 = (subrow/8)*8 // so we have an offset of 8 rows every loop or (subrow/warps)*8 = (subrow/8)*8
...@@ -2747,7 +2747,7 @@ template <int THREADS, int ITEMS_PER_THREAD, int TILE_ROWS, int TILE_COLS, int T ...@@ -2747,7 +2747,7 @@ template <int THREADS, int ITEMS_PER_THREAD, int TILE_ROWS, int TILE_COLS, int T
// each of these has 32 values in total for 32*4 = 128 as offset if odd // each of these has 32 values in total for 32*4 = 128 as offset if odd
// every set of 4 columns increases the total offset by 16 // every set of 4 columns increases the total offset by 16
// each even row increase the offset by 4, for example row 2 is offset by 4, 4 by 6 etc so: subrow/2*4 = subrow*2 // each even row increase the offset by 4, for example row 2 is offset by 4, 4 by 6 etc so: subrow/2*4 = subrow*2
// this happends every 8 rows anew (subrow % 8) // this happens every 8 rows anew (subrow % 8)
// one writes 4 columns at once that is (col % 4) for the particular index in the subtile // one writes 4 columns at once that is (col % 4) for the particular index in the subtile
int subcol = warp_lane; int subcol = warp_lane;
...@@ -3073,7 +3073,7 @@ template <int FORMAT> __global__ void kExtractOutliers(char *A, int *idx, char * ...@@ -3073,7 +3073,7 @@ template <int FORMAT> __global__ void kExtractOutliers(char *A, int *idx, char *
//// 4. do dequantization from register of B into second pair of registers //// 4. do dequantization from register of B into second pair of registers
//// 5. store (4) into fragment //// 5. store (4) into fragment
//// 6. matmul aggregate into fragment C //// 6. matmul aggregate into fragment C
//// 7. aggreecate files of C into shared memroy block C //// 7. aggreecate files of C into shared memory block C
//// 8. sum (7) //// 8. sum (7)
//// 9. write outputs to matmul output matrix //// 9. write outputs to matmul output matrix
//} //}
......
...@@ -5,7 +5,7 @@ echo "MAKE SURE LD_LIBRARY_PATH IS EMPTY!" ...@@ -5,7 +5,7 @@ echo "MAKE SURE LD_LIBRARY_PATH IS EMPTY!"
echo $LD_LIBRARY_PATH echo $LD_LIBRARY_PATH
if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -24,7 +24,7 @@ make cpuonly CUDA_VERSION="CPU" ...@@ -24,7 +24,7 @@ make cpuonly CUDA_VERSION="CPU"
if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -34,7 +34,7 @@ make cuda110 CUDA_VERSION=110 ...@@ -34,7 +34,7 @@ make cuda110 CUDA_VERSION=110
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -44,7 +44,7 @@ make cuda11x CUDA_VERSION=111 ...@@ -44,7 +44,7 @@ make cuda11x CUDA_VERSION=111
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -54,7 +54,7 @@ make cuda11x CUDA_VERSION=114 ...@@ -54,7 +54,7 @@ make cuda11x CUDA_VERSION=114
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -64,7 +64,7 @@ make cuda11x CUDA_VERSION=115 ...@@ -64,7 +64,7 @@ make cuda11x CUDA_VERSION=115
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -74,7 +74,7 @@ make cuda11x CUDA_VERSION=117 ...@@ -74,7 +74,7 @@ make cuda11x CUDA_VERSION=117
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -84,7 +84,7 @@ make cuda118 CUDA_VERSION=118 ...@@ -84,7 +84,7 @@ make cuda118 CUDA_VERSION=118
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -94,7 +94,7 @@ make cuda12x CUDA_VERSION=120 ...@@ -94,7 +94,7 @@ make cuda12x CUDA_VERSION=120
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -104,7 +104,7 @@ make cuda12x CUDA_VERSION=121 ...@@ -104,7 +104,7 @@ make cuda12x CUDA_VERSION=121
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -114,7 +114,7 @@ make cuda12x CUDA_VERSION=122 ...@@ -114,7 +114,7 @@ make cuda12x CUDA_VERSION=122
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -124,7 +124,7 @@ make cuda12x CUDA_VERSION=123 ...@@ -124,7 +124,7 @@ make cuda12x CUDA_VERSION=123
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -138,7 +138,7 @@ make cuda110_nomatmul CUDA_VERSION=110 ...@@ -138,7 +138,7 @@ make cuda110_nomatmul CUDA_VERSION=110
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -149,7 +149,7 @@ make cuda11x_nomatmul CUDA_VERSION=111 ...@@ -149,7 +149,7 @@ make cuda11x_nomatmul CUDA_VERSION=111
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -159,7 +159,7 @@ make cuda11x_nomatmul CUDA_VERSION=114 ...@@ -159,7 +159,7 @@ make cuda11x_nomatmul CUDA_VERSION=114
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -169,7 +169,7 @@ make cuda11x_nomatmul CUDA_VERSION=115 ...@@ -169,7 +169,7 @@ make cuda11x_nomatmul CUDA_VERSION=115
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -179,7 +179,7 @@ make cuda11x_nomatmul CUDA_VERSION=117 ...@@ -179,7 +179,7 @@ make cuda11x_nomatmul CUDA_VERSION=117
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -189,7 +189,7 @@ make cuda118_nomatmul CUDA_VERSION=118 ...@@ -189,7 +189,7 @@ make cuda118_nomatmul CUDA_VERSION=118
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -199,7 +199,7 @@ make cuda12x_nomatmul CUDA_VERSION=120 ...@@ -199,7 +199,7 @@ make cuda12x_nomatmul CUDA_VERSION=120
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -209,7 +209,7 @@ make cuda12x_nomatmul CUDA_VERSION=121 ...@@ -209,7 +209,7 @@ make cuda12x_nomatmul CUDA_VERSION=121
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -219,7 +219,7 @@ make cuda12x_nomatmul CUDA_VERSION=122 ...@@ -219,7 +219,7 @@ make cuda12x_nomatmul CUDA_VERSION=122
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
...@@ -229,7 +229,7 @@ make cuda12x_nomatmul CUDA_VERSION=123 ...@@ -229,7 +229,7 @@ make cuda12x_nomatmul CUDA_VERSION=123
if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123_nocublaslt.so" ]; then if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123_nocublaslt.so" ]; then
# Control will enter here if $DIRECTORY doesn't exist. # Control will enter here if $DIRECTORY doesn't exist.
echo "Compilation unsuccessul!" 1>&2 echo "Compilation unsuccessful!" 1>&2
exit 64 exit 64
fi fi
......
# Contributors guidelines # Contributors guidelines
... stil under construction ... (feel free to propose materials, `bitsandbytes` is a community project) ... still under construction ... (feel free to propose materials, `bitsandbytes` is a community project)
## Setup pre-commit hooks ## Setup pre-commit hooks
- Install pre-commit hooks with `pip install pre-commit`. - Install pre-commit hooks with `pip install pre-commit`.
......
...@@ -29,7 +29,7 @@ Please review the [bitsandbytes section in the Accelerate docs](https://huggingf ...@@ -29,7 +29,7 @@ Please review the [bitsandbytes section in the Accelerate docs](https://huggingf
# Trainer for the optimizers # Trainer for the optimizers
You can use any of the 8-bit and/or paged optimizers by simple passing them to the `transformers.Trainer` class on intialization.All bnb optimizers are supported by passing the correct string in `TrainingArguments`'s `optim` attribute - e.g. (`paged_adamw_32bit`). You can use any of the 8-bit and/or paged optimizers by simple passing them to the `transformers.Trainer` class on initialization.All bnb optimizers are supported by passing the correct string in `TrainingArguments`'s `optim` attribute - e.g. (`paged_adamw_32bit`).
See the [official API docs for reference](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.Trainer). See the [official API docs for reference](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.Trainer).
......
...@@ -168,9 +168,9 @@ Possible options for the config override are: `betas, eps, weight_decay, lr, opt ...@@ -168,9 +168,9 @@ Possible options for the config override are: `betas, eps, weight_decay, lr, opt
For overrides for particular layers, we recommend overriding locally in each module. You can do this by passing the module, the parameter, and its attribute name to the GlobalOptimManager: For overrides for particular layers, we recommend overriding locally in each module. You can do this by passing the module, the parameter, and its attribute name to the GlobalOptimManager:
```py ```py
class MyModule(torch.nn.Module): class MyModule(torch.nn.Module):
def __init__(din, dout): def __init__(d_in, d_out):
super(MyModule, self).__init__() super(MyModule, self).__init__()
self.linear = torch.nn.Linear(din, dout) self.linear = torch.nn.Linear(d_in, d_out)
# optimization will happen in 32-bit and # optimization will happen in 32-bit and
# learning rate will be set to 0.0001 independent of the main learning rate # learning rate will be set to 0.0001 independent of the main learning rate
config = {'optim_bits': 32, 'lr' : 0.0001} config = {'optim_bits': 32, 'lr' : 0.0001}
......
...@@ -157,7 +157,7 @@ private: ...@@ -157,7 +157,7 @@ private:
FVec<AVX, float> vxp = _mm256_i32gather_ps(xi, idxp, sizeof(float)); FVec<AVX, float> vxp = _mm256_i32gather_ps(xi, idxp, sizeof(float));
IVec<AVX, float> ip = idxm; IVec<AVX, float> ip = idxm;
#else // do not use gather instrucions #else // do not use gather instructions
union U { union U {
__m256i vec; __m256i vec;
......
...@@ -147,5 +147,5 @@ inline T prev(T x) ...@@ -147,5 +147,5 @@ inline T prev(T x)
return x; return x;
} }
} // namepsace Details } // namespace Details
} // namespace BinSearch } // namespace BinSearch
...@@ -568,5 +568,5 @@ FORCE_INLINE FVec<AVX, double> mulSub(const FVec<AVX, double>& a, const FVec<AVX ...@@ -568,5 +568,5 @@ FORCE_INLINE FVec<AVX, double> mulSub(const FVec<AVX, double>& a, const FVec<AVX
#endif #endif
} // namepsace Details } // namespace Details
} // namespace BinSearch } // namespace BinSearch
...@@ -483,7 +483,14 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward): ...@@ -483,7 +483,14 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward):
assert (idx == 0).sum().item() <= b1.numel() * 0.005 assert (idx == 0).sum().item() <= b1.numel() * 0.005
@pytest.mark.parametrize("module", [lambda nin, nout, bias=True: bnb.nn.Linear8bitLt(nin, nout, bias=bias, has_fp16_weights=False), bnb.nn.LinearFP4], ids=['Int8Lt', 'FP4']) @pytest.mark.parametrize(
"module",
[
lambda n_in, n_out, bias=True: bnb.nn.Linear8bitLt(n_in, n_out, bias=bias, has_fp16_weights=False),
bnb.nn.LinearFP4,
],
ids=['Int8Lt', 'FP4'],
)
def test_linear_kbit_fp32_bias(module): def test_linear_kbit_fp32_bias(module):
# casts model to fp16 -> int8 automatically # casts model to fp16 -> int8 automatically
l1 = module(32, 64).cuda() l1 = module(32, 64).cuda()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment