Commit 1c1b2576 authored by Titus von Koeller's avatar Titus von Koeller
Browse files

further tweaks to reporting

parent c2480e35
...@@ -3,6 +3,7 @@ import logging ...@@ -3,6 +3,7 @@ import logging
import os import os
from pathlib import Path from pathlib import Path
import re import re
from typing import Optional
import torch import torch
...@@ -75,6 +76,91 @@ def get_available_cuda_binaries() -> list[str]: ...@@ -75,6 +76,91 @@ def get_available_cuda_binaries() -> list[str]:
return sorted(versions) return sorted(versions)
def parse_cuda_version(version_str: str) -> str:
"""Convert raw version string (e.g. '118' from env var) to formatted version (e.g. '11.8')"""
if version_str.isdigit() and len(version_str) == 3:
return f"{version_str[:2]}.{version_str[2]}"
return version_str # fallback as safety net
def _format_cuda_error_message(
available_versions: list[str],
user_cuda_version: str,
override_info: str,
original_error: str = "",
include_diagnostics: bool = False,
include_override_notes: bool = False,
required_version: Optional[str] = None,
version_missing: bool = False,
) -> str:
version_list = ", ".join(available_versions) if available_versions else "none"
base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
# Explicit version availability check
version_alert = ""
if version_missing and required_version:
version_list_str = "\n- " + "\n- ".join(available_versions) if available_versions else "NONE"
version_alert = (
f"🚨 CUDA VERSION MISMATCH 🚨\n"
f"Requested CUDA version: {required_version}\n"
f"Available pre-compiled versions: {version_list_str}\n\n"
"This means:\n"
"1. The version you're trying to use is NOT distributed with this package\n"
"2. You MUST compile from source for this specific CUDA version\n"
"3. The installation will NOT work until you compile or choose a CUDA supported version\n\n"
)
troubleshooting = (
"This typically happens when:\n"
"1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
"2. The library wasn't compiled properly during installation from source\n"
"3. Missing CUDA dependencies\n\n"
)
note = (
"To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
"If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
)
cuda_info = (
f"Detected PyTorch CUDA version: {user_cuda_version}\n"
f"Available pre-compiled bitsandbytes binaries for these CUDA versions: {version_list}\n"
f"{override_info}\n\n"
)
compile_instructions = (
(
"You have three options:\n"
"1. COMPILE FROM SOURCE (required if no binary exists):\n"
" https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
"2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one\n"
"3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
)
if include_override_notes
else ""
)
diagnostics = (
(
"🔍 Run this command for detailed diagnostics:\n"
"python -m bitsandbytes\n\n"
"If you've tried everything and still have issues:\n"
"1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
"2. Describe what you've tried in detail\n"
"3. Open an issue with this information:\n"
" https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
)
if include_diagnostics
else ""
)
return (
f"{version_alert}{base_msg}{troubleshooting}{cuda_info}"
f"{note}{compile_instructions}"
f"{original_error}\n{diagnostics}"
)
class MockBNBNativeLibrary(BNBNativeLibrary): class MockBNBNativeLibrary(BNBNativeLibrary):
""" """
Mock BNBNativeLibrary that raises an error when trying to use native library Mock BNBNativeLibrary that raises an error when trying to use native library
...@@ -89,43 +175,27 @@ class MockBNBNativeLibrary(BNBNativeLibrary): ...@@ -89,43 +175,27 @@ class MockBNBNativeLibrary(BNBNativeLibrary):
def __getattr__(self, name): def __getattr__(self, name):
available_versions = get_available_cuda_binaries() available_versions = get_available_cuda_binaries()
version_list = ", ".join(available_versions) if available_versions else "none" override_value = os.environ.get("BNB_CUDA_VERSION")
override_info = f"\nCUDA override: BNB_CUDA_VERSION={override_value}" if override_value else ""
user_ver = "Not detected"
if self.user_cuda_version:
user_ver = f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
override_value = os.environ.get("BNB_CUDA_VERSION", None) formatted_version = (
override_info = ( parse_cuda_version(override_value)
f"\nCUDA version overridden with BNB_CUDA_VERSION={override_value} environment variable"
if override_value if override_value
else "" else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
) )
required_version = formatted_version
note = "To make bitsandbytes work, the compiled version of the library must match the corresponding linked CUDA version. If you are using a CUDA version that doesn't come with a pre-compiled binary, the only solution is to compile the library from source." version_missing = required_version not in available_versions
cuda_info = ( msg = _format_cuda_error_message(
f"Detected PyTorch CUDA version: {user_ver}\n" available_versions=available_versions,
f"Available pre-compiled bitsandbytes binaries for CUDA versions: {version_list}" user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}",
+ override_info override_info=override_info,
+ "\n\n" original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
+ note include_diagnostics=True,
+ "\n\n" required_version=formatted_version,
) version_missing=version_missing,
base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
original_error = f"Original error: {self.error_msg}\n\n" if self.error_msg else ""
troubleshooting = (
"This typically happens when:\n"
"1. BNB doesn't ship with a pre-compiled binary for your CUDA version\n"
"2. The library wasn't compiled properly during installation from source\n"
"3. Missing CUDA dependencies\n\n"
) )
err_msg = ( raise RuntimeError(msg)
base_msg + troubleshooting + cuda_info + original_error + ("Run 'python -m bitsandbytes' for diagnostics.")
)
raise RuntimeError(err_msg)
def __getitem__(self, name): def __getitem__(self, name):
return self.__getattr__(name) return self.__getattr__(name)
...@@ -142,7 +212,24 @@ def get_native_library() -> BNBNativeLibrary: ...@@ -142,7 +212,24 @@ def get_native_library() -> BNBNativeLibrary:
if cuda_binary_path.exists(): if cuda_binary_path.exists():
binary_path = cuda_binary_path binary_path = cuda_binary_path
else: else:
logger.warning("Could not find the bitsandbytes CUDA binary at %r", cuda_binary_path) available_versions = get_available_cuda_binaries()
env_version = os.environ.get("BNB_CUDA_VERSION")
override_info = "\nCUDA override active" if env_version else ""
formatted_version = parse_cuda_version(env_version) if env_version else cuda_specs.cuda_version_string
required_version = formatted_version
version_missing = required_version not in available_versions
msg = _format_cuda_error_message(
available_versions=available_versions,
user_cuda_version=cuda_specs.cuda_version_string,
override_info=override_info,
include_override_notes=True,
required_version=formatted_version,
version_missing=version_missing,
)
logger.warning(msg)
logger.debug(f"Loading bitsandbytes native library from: {binary_path}") logger.debug(f"Loading bitsandbytes native library from: {binary_path}")
dll = ct.cdll.LoadLibrary(str(binary_path)) dll = ct.cdll.LoadLibrary(str(binary_path))
...@@ -165,7 +252,7 @@ except Exception as e: ...@@ -165,7 +252,7 @@ except Exception as e:
diagnostic_help = "" diagnostic_help = ""
if torch.cuda.is_available(): if torch.cuda.is_available():
diagnostic_help = ( diagnostic_help = (
"CUDA Setup failed despite CUDA being available. " "CUDA Setup failed despite CUDA being available.\n\n"
"Please run the following command to get more information:\n\n" "Please run the following command to get more information:\n\n"
"python -m bitsandbytes\n\n" "python -m bitsandbytes\n\n"
"Inspect the output of the command and see if you can locate CUDA libraries. " "Inspect the output of the command and see if you can locate CUDA libraries. "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment