Commit 0d1b3a32 authored by Matthew Douglas's avatar Matthew Douglas
Browse files

Last minute pre-release changes

parent 1d4ea6ac
......@@ -445,20 +445,22 @@ def _gemv_4bit_impl(
out: torch.Tensor,
) -> None:
torch._check_is_size(blocksize)
torch._check(
A.numel() == A.size(-1),
lambda: f"A must be a vector with leading dimensions of 1, got {A.shape}",
)
torch._check(
A.dtype in [torch.float16, torch.bfloat16, torch.float32],
lambda: f"A must be float16, bfloat16, or float32, got {A.dtype}",
)
torch._check(
B.dtype in [torch.uint8, torch.bfloat16, torch.float16, torch.float32],
lambda: f"B must be backed by storage of type uint8, bfloat16, float16, or float32, got {B.dtype}",
)
torch._check(absmax.dtype == torch.float32, lambda: f"absmax must be float32, got {absmax.dtype}")
torch._check(code.dtype == torch.float32, lambda: f"code must be float32, got {code.dtype}")
# Note: these checks are not strictly necessary, and cost more than they are worth, so they are commented out for now.
# torch._check(
# A.numel() == A.size(-1),
# lambda: f"A must be a vector with leading dimensions of 1, got {A.shape}",
# )
# torch._check(
# A.dtype in [torch.float16, torch.bfloat16, torch.float32],
# lambda: f"A must be float16, bfloat16, or float32, got {A.dtype}",
# )
# torch._check(
# B.dtype in [torch.uint8, torch.bfloat16, torch.float16, torch.float32],
# lambda: f"B must be backed by storage of type uint8, bfloat16, float16, or float32, got {B.dtype}",
# )
# torch._check(absmax.dtype == torch.float32, lambda: f"absmax must be float32, got {absmax.dtype}")
# torch._check(code.dtype == torch.float32, lambda: f"code must be float32, got {code.dtype}")
m = ct.c_int32(shapeB[0])
n = ct.c_int32(1)
......
import ctypes as ct
import functools
import logging
import os
from pathlib import Path
......@@ -29,10 +30,8 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path:
library_name = re.sub(r"cuda\d+", f"cuda{override_value}", library_name, count=1)
logger.warning(
f"WARNING: BNB_CUDA_VERSION={override_value} environment variable detected; loading {library_name}.\n"
"This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n"
"This can be used to load a bitsandbytes version built with a CUDA version that is different from the PyTorch CUDA version.\n"
"If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n"
"If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n"
"For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n",
)
return PACKAGE_DIR / library_name
......@@ -45,10 +44,14 @@ class BNBNativeLibrary:
def __init__(self, lib: ct.CDLL):
self._lib = lib
@functools.cache # noqa: B019
def __getattr__(self, name):
fn = getattr(self._lib, name, None)
if fn is not None:
return fn
def throw_on_call(*args, **kwargs):
if hasattr(self._lib, name):
return getattr(self._lib, name)(*args, **kwargs)
raise RuntimeError(
f"Method '{name}' not available in CPU-only version of bitsandbytes.\n"
"Reinstall with GPU support or use CUDA-enabled hardware."
......
......@@ -6,7 +6,6 @@ from pathlib import Path
import torch
from bitsandbytes.cextension import get_cuda_bnb_library_path
from bitsandbytes.consts import NONPYTORCH_DOC_URL
from bitsandbytes.cuda_specs import CUDASpecs
from bitsandbytes.diagnostics.utils import print_dedented
......@@ -115,25 +114,9 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
print_dedented(
f"""
Library not found: {binary_path}. Maybe you need to compile it from source?
If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`,
for example, `make CUDA_VERSION=113`.
The CUDA version for the compile might depend on your conda install, if using conda.
Inspect CUDA version via `conda list | grep cuda`.
""",
)
cuda_major, cuda_minor = cuda_specs.cuda_version_tuple
if cuda_major < 11:
print_dedented(
"""
WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8().
You will be only to use 8-bit optimizers and quantization routines!
""",
)
print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}")
# 7.5 is the minimum CC for int8 tensor cores
if not cuda_specs.has_imma:
print_dedented(
......@@ -144,10 +127,6 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
""",
)
# TODO:
# (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
# (2) Multiple CUDA versions installed
def print_cuda_runtime_diagnostics() -> None:
cudart_paths = list(find_cudart_libraries())
......
import importlib
import platform
import sys
import traceback
import torch
from bitsandbytes import __version__ as bnb_version
from bitsandbytes.consts import PACKAGE_GITHUB_URL
from bitsandbytes.cuda_specs import get_cuda_specs
from bitsandbytes.diagnostics.cuda import (
print_cuda_diagnostics,
print_cuda_runtime_diagnostics,
)
from bitsandbytes.diagnostics.utils import print_dedented, print_header
_RELATED_PACKAGES = [
"accelerate",
"diffusers",
"numpy",
"pip",
"peft",
"safetensors",
"transformers",
"triton",
"trl",
]
def sanity_check():
from bitsandbytes.optim import Adam
......@@ -27,30 +41,59 @@ def sanity_check():
assert p1 != p2
def get_package_version(name: str) -> str:
try:
version = importlib.metadata.version(name)
except importlib.metadata.PackageNotFoundError:
version = "not found"
return version
def show_environment():
"""Simple utility to print out environment information."""
print(f"Platform: {platform.platform()}")
if platform.system() == "Linux":
print(f" libc: {'-'.join(platform.libc_ver())}")
print(f"Python: {platform.python_version()}")
print(f"PyTorch: {torch.__version__}")
print(f" CUDA: {torch.version.cuda or 'N/A'}")
print(f" HIP: {torch.version.hip or 'N/A'}")
print(f" XPU: {getattr(torch.version, 'xpu', 'N/A') or 'N/A'}")
print("Related packages:")
for pkg in _RELATED_PACKAGES:
version = get_package_version(pkg)
print(f" {pkg}: {version}")
def main():
print_header("")
print_header("BUG REPORT INFORMATION")
print_header(f"bitsandbytes v{bnb_version}")
show_environment()
print_header("")
print_header("OTHER")
cuda_specs = get_cuda_specs()
print("CUDA specs:", cuda_specs)
if not torch.cuda.is_available():
print("Torch says CUDA is not available. Possible reasons:")
print("1. CUDA driver not installed")
print("2. CUDA not installed")
print("3. You have multiple conflicting CUDA libraries")
if cuda_specs:
print_cuda_diagnostics(cuda_specs)
print_cuda_runtime_diagnostics()
print_header("")
print_header("DEBUG INFO END")
print_header("")
# TODO: There's a lot of noise in this; needs improvement.
# print_cuda_runtime_diagnostics()
if not torch.cuda.is_available():
print("PyTorch says CUDA is not available. Possible reasons:")
print("1. CUDA driver not installed")
print("2. Using a CPU-only PyTorch build")
print("3. No GPU detected")
else:
print("Checking that the library is importable and CUDA is callable...")
try:
sanity_check()
print("SUCCESS!")
print("Installation was successful!")
return
except RuntimeError as e:
if "not available in CPU-only" in str(e):
......@@ -63,6 +106,7 @@ def main():
raise e
except Exception:
traceback.print_exc()
print_dedented(
f"""
Above we output some debug information.
......
......@@ -3,7 +3,7 @@ import textwrap
HEADER_WIDTH = 60
def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "+") -> None:
def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "=") -> None:
txt = f" {txt} " if txt else ""
print(txt.center(width, filler))
......
......@@ -851,8 +851,8 @@ def dequantize_blockwise(
torch.ops.bitsandbytes.dequantize_blockwise.out(
A,
absmax,
code.to(A.device),
blocksize,
quant_state.code.to(A.device),
quant_state.blocksize,
quant_state.dtype,
out=out,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment