main.py 18 KB
Newer Older
1
"""
2
extract factors the build is dependent on:
3
[X] compute capability
4
    [ ] TODO: Q - What if we have multiple GPUs of different makes?
5
6
7
8
9
10
11
12
13
14
15
16
17
18
- CUDA version
- Software:
    - CPU-only: only CPU quantization functions (no optimizer, no matrix multipl)
    - CuBLAS-LT: full-build 8-bit optimizer
    - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)

evaluation:
    - if paths faulty, return meaningful error
    - else:
        - determine CUDA version
        - determine capabilities
        - based on that set the default path
"""

19
import ctypes as ct
20
import os
21
import errno
22
import torch
Felix Borzik's avatar
Felix Borzik committed
23
from warnings import warn
24

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from pathlib import Path
from typing import Set, Union
from .env_vars import get_potentially_lib_path_containing_env_vars

CUDA_RUNTIME_LIB: str = "libcudart.so"

class CUDASetup:
    _instance = None

    def __init__(self):
        raise RuntimeError("Call get_instance() instead")

    def generate_instructions(self):
        if self.cuda is None:
            self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA library was not detected.')
            self.add_log_entry('CUDA SETUP: Solution 1): Your paths are probably not up-to-date. You can update them via: sudo ldconfig.')
            self.add_log_entry('CUDA SETUP: Solution 2): If you do not have sudo rights, you can do the following:')
            self.add_log_entry('CUDA SETUP: Solution 2a): Find the cuda library via: find / -name libcuda.so 2>/dev/null')
            self.add_log_entry('CUDA SETUP: Solution 2b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_2a')
            self.add_log_entry('CUDA SETUP: Solution 2c): For a permanent solution add the export from 2b into your .bashrc file, located at ~/.bashrc')
            return

        if self.cudart_path is None:
            self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.')
            self.add_log_entry('CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be added to the LD_LIBRARY_PATH variable')
            self.add_log_entry('CUDA SETUP: Solution 1a): Find the cuda runtime library via: find / -name libcudart.so 2>/dev/null')
            self.add_log_entry('CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a')
            self.add_log_entry('CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc')
            self.add_log_entry('CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.')
            self.add_log_entry('CUDA SETUP: Solution 2a): Download CUDA install script: wget https://github.com/TimDettmers/bitsandbytes/blob/main/cuda_install.sh')
            self.add_log_entry('CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.')
            self.add_log_entry('CUDA SETUP: Solution 2b): For example, "bash cuda_install.sh 113 ~/local/" will download CUDA 11.3 and install into the folder ~/local')
            return

        make_cmd = f'CUDA_VERSION={self.cuda_version_string}'
        if len(self.cuda_version_string) < 3:
            make_cmd += ' make cuda92'
        elif self.cuda_version_string == '110':
            make_cmd += ' make cuda110'
        elif self.cuda_version_string[:2] == '11' and int(self.cuda_version_string[2]) > 0:
            make_cmd += ' make cuda11x'
66
67
68
69
70
        elif self.cuda_version_string == '100':
            self.add_log_entry('CUDA SETUP: CUDA 10.0 not supported. Please use a different CUDA version.')
            self.add_log_entry('CUDA SETUP: Before you try again running bitsandbytes, make sure old CUDA 10.0 versions are uninstalled and removed from $LD_LIBRARY_PATH variables.')
            return

71

72
        has_cublaslt = is_cublasLt_compatible(self.cc)
73
74
75
76
77
78
79
80
81
82
        if not has_cublaslt:
            make_cmd += '_nomatmul'

        self.add_log_entry('CUDA SETUP: Something unexpected happened. Please compile from source:')
        self.add_log_entry('git clone git@github.com:TimDettmers/bitsandbytes.git')
        self.add_log_entry('cd bitsandbytes')
        self.add_log_entry(make_cmd)
        self.add_log_entry('python setup.py install')

    def initialize(self):
Tim Dettmers's avatar
Tim Dettmers committed
83
84
85
86
        if not getattr(self, 'initialized', False):
            self.has_printed = False
            self.lib = None
            self.initialized = False
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

    def run_cuda_setup(self):
        self.initialized = True
        self.cuda_setup_log = []

        binary_name, cudart_path, cuda, cc, cuda_version_string = evaluate_cuda_setup()
        self.cudart_path = cudart_path
        self.cuda = cuda
        self.cc = cc
        self.cuda_version_string = cuda_version_string

        package_dir = Path(__file__).parent.parent
        binary_path = package_dir / binary_name

        try:
            if not binary_path.exists():
                self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?")
104
                legacy_binary_name = "libbitsandbytes_cpu.so"
105
106
                self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
                binary_path = package_dir / legacy_binary_name
Tim Dettmers's avatar
Tim Dettmers committed
107
                if not binary_path.exists() or torch.cuda.is_available():
108
109
110
111
112
113
114
115
                    self.add_log_entry('')
                    self.add_log_entry('='*48 + 'ERROR' + '='*37)
                    self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:')
                    self.add_log_entry('1. CUDA driver not installed')
                    self.add_log_entry('2. CUDA not installed')
                    self.add_log_entry('3. You have multiple conflicting CUDA libraries')
                    self.add_log_entry('4. Required library not pre-compiled for this bitsandbytes release!')
                    self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.')
Tim Dettmers's avatar
Tim Dettmers committed
116
                    self.add_log_entry('CUDA SETUP: The CUDA version for the compile might depend on your conda install. Inspect CUDA version via `conda list | grep cuda`.')
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
                    self.add_log_entry('='*80)
                    self.add_log_entry('')
                    self.generate_instructions()
                    self.print_log_stack()
                    raise Exception('CUDA SETUP: Setup Failed!')
                self.lib = ct.cdll.LoadLibrary(binary_path)
            else:
                self.add_log_entry(f"CUDA SETUP: Loading binary {binary_path}...")
                self.lib = ct.cdll.LoadLibrary(binary_path)
        except Exception as ex:
            self.add_log_entry(str(ex))
            self.print_log_stack()

    def add_log_entry(self, msg, is_warning=False):
        self.cuda_setup_log.append((msg, is_warning))

    def print_log_stack(self):
        for msg, is_warning in self.cuda_setup_log:
            if is_warning:
                warn(msg)
            else:
                print(msg)

    @classmethod
    def get_instance(cls):
        if cls._instance is None:
            cls._instance = cls.__new__(cls)
            cls._instance.initialize()
        return cls._instance


148
149
150
151
152
def is_cublasLt_compatible(cc):
    has_cublaslt = False
    if cc is not None:
        cc_major, cc_minor = cc.split('.')
        if int(cc_major) < 7 or (int(cc_major) == 7 and int(cc_minor) < 5):
Tim Dettmers's avatar
Tim Dettmers committed
153
            cuda_setup.add_log_entry("WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU!", is_warning=True)
154
155
156
        else:
            has_cublaslt = True
    return has_cublaslt
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]:
    return {Path(ld_path) for ld_path in paths_list_candidate.split(":") if ld_path}


def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
    existent_directories: Set[Path] = set()
    for path in candidate_paths:
        try:
            if path.exists():
                existent_directories.add(path)
        except OSError as exc:
            if exc.errno != errno.ENAMETOOLONG:
                raise exc

    non_existent_directories: Set[Path] = candidate_paths - existent_directories
    if non_existent_directories:
        CUDASetup.get_instance().add_log_entry("WARNING: The following directories listed in your path were found to "
            f"be non-existent: {non_existent_directories}", is_warning=True)

    return existent_directories


def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]:
    return {
        path / CUDA_RUNTIME_LIB
        for path in candidate_paths
        if (path / CUDA_RUNTIME_LIB).is_file()
    }


def resolve_paths_list(paths_list_candidate: str) -> Set[Path]:
    """
    Searches a given environmental var for the CUDA runtime library,
    i.e. `libcudart.so`.
    """
    return remove_non_existent_dirs(extract_candidate_paths(paths_list_candidate))


def find_cuda_lib_in(paths_list_candidate: str) -> Set[Path]:
    return get_cuda_runtime_lib_paths(
        resolve_paths_list(paths_list_candidate)
    )


def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None:
    if len(results_paths) > 1:
        warning_msg = (
            f"Found duplicate {CUDA_RUNTIME_LIB} files: {results_paths}.. "
            "We'll flip a coin and try one of these, in order to fail forward.\n"
            "Either way, this might cause trouble in the future:\n"
            "If you get `CUDA error: invalid device function` errors, the above "
            "might be the cause and the solution is to make sure only one "
            f"{CUDA_RUNTIME_LIB} in the paths that we search based on your env.")
        CUDASetup.get_instance().add_log_entry(warning_msg, is_warning=True)


def determine_cuda_runtime_lib_path() -> Union[Path, None]:
    """
        Searches for a cuda installations, in the following order of priority:
            1. active conda env
            2. LD_LIBRARY_PATH
            3. any other env vars, while ignoring those that
                - are known to be unrelated (see `bnb.cuda_setup.env_vars.to_be_ignored`)
                - don't contain the path separator `/`

        If multiple libraries are found in part 3, we optimistically try one,
        while giving a warning message.
    """
    candidate_env_vars = get_potentially_lib_path_containing_env_vars()

    if "CONDA_PREFIX" in candidate_env_vars:
        conda_libs_path = Path(candidate_env_vars["CONDA_PREFIX"]) / "lib"

        conda_cuda_libs = find_cuda_lib_in(str(conda_libs_path))
        warn_in_case_of_duplicates(conda_cuda_libs)

        if conda_cuda_libs:
            return next(iter(conda_cuda_libs))

        CUDASetup.get_instance().add_log_entry(f'{candidate_env_vars["CONDA_PREFIX"]} did not contain '
            f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...', is_warning=True)

    if "LD_LIBRARY_PATH" in candidate_env_vars:
        lib_ld_cuda_libs = find_cuda_lib_in(candidate_env_vars["LD_LIBRARY_PATH"])

        if lib_ld_cuda_libs:
            return next(iter(lib_ld_cuda_libs))
        warn_in_case_of_duplicates(lib_ld_cuda_libs)

        CUDASetup.get_instance().add_log_entry(f'{candidate_env_vars["LD_LIBRARY_PATH"]} did not contain '
            f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...', is_warning=True)

    remaining_candidate_env_vars = {
        env_var: value for env_var, value in candidate_env_vars.items()
        if env_var not in {"CONDA_PREFIX", "LD_LIBRARY_PATH"}
    }

    cuda_runtime_libs = set()
    for env_var, value in remaining_candidate_env_vars.items():
        cuda_runtime_libs.update(find_cuda_lib_in(value))

    if len(cuda_runtime_libs) == 0:
        CUDASetup.get_instance().add_log_entry('CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64...')
        cuda_runtime_libs.update(find_cuda_lib_in('/usr/local/cuda/lib64'))

    warn_in_case_of_duplicates(cuda_runtime_libs)
264

265
    return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None
Tom Aarsen's avatar
Tom Aarsen committed
266

267
268

def check_cuda_result(cuda, result_val):
269
    # 3. Check for CUDA errors
270
    if result_val != 0:
271
272
        error_str = ct.c_char_p()
        cuda.cuGetErrorString(result_val, ct.byref(error_str))
273
274
275
276
        if error_str.value is not None:
            CUDASetup.get_instance().add_log_entry(f"CUDA exception! Error code: {error_str.value.decode()}")
        else:
            CUDASetup.get_instance().add_log_entry(f"Unknown CUDA exception! Please check your CUDA install. It might also be that your GPU is too old.")
277

278
279

# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION
280
def get_cuda_version(cuda, cudart_path):
281
282
    if cuda is None: return None

283
    try:
284
        cudart = ct.CDLL(cudart_path)
285
    except OSError:
286
        CUDASetup.get_instance().add_log_entry(f'ERROR: libcudart.so could not be read from path: {cudart_path}!')
287
288
        return None

289
    version = ct.c_int()
290
291
292
293
294
295
    try:
        check_cuda_result(cuda, cudart.cudaRuntimeGetVersion(ct.byref(version)))
    except AttributeError as e:
        CUDASetup.get_instance().add_log_entry(f'ERROR: {str(e)}')
        CUDASetup.get_instance().add_log_entry(f'CUDA SETUP: libcudart.so path is {cudart_path}')
        CUDASetup.get_instance().add_log_entry(f'CUDA SETUP: Is seems that your cuda installation is not in your path. See https://github.com/TimDettmers/bitsandbytes/issues/85 for more information.')
296
297
298
299
    version = int(version.value)
    major = version//1000
    minor = (version-(major*1000))//10

300
    if major < 11:
301
        CUDASetup.get_instance().add_log_entry('CUDA SETUP: CUDA version lower than 11 are currently not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!!')
302

303
304
305
306
307
308
    return f'{major}{minor}'


def get_cuda_lib_handle():
    # 1. find libcuda.so library (GPU driver) (/usr/lib)
    try:
309
        cuda = ct.CDLL("libcuda.so")
310
    except OSError:
311
        CUDASetup.get_instance().add_log_entry('CUDA SETUP: WARNING! libcuda.so not found! Do you have a CUDA driver installed? If you are on a cluster, make sure you are on a CUDA machine!')
312
313
        return None
    check_cuda_result(cuda, cuda.cuInit(0))
314

315
316
317
318
    return cuda


def get_compute_capabilities(cuda):
319
320
321
322
323
324
325
326
327
    """
    1. find libcuda.so library (GPU driver) (/usr/lib)
       init_device -> init variables -> call function by reference
    2. call extern C function to determine CC
       (https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html)
    3. Check for CUDA errors
       https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
    # bits taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
    """
328

329
330
331
    nGpus = ct.c_int()
    cc_major = ct.c_int()
    cc_minor = ct.c_int()
332

333
    device = ct.c_int()
334

335
    check_cuda_result(cuda, cuda.cuDeviceGetCount(ct.byref(nGpus)))
336
337
    ccs = []
    for i in range(nGpus.value):
338
339
340
        check_cuda_result(cuda, cuda.cuDeviceGet(ct.byref(device), i))
        ref_major = ct.byref(cc_major)
        ref_minor = ct.byref(cc_minor)
341
        # 2. call extern C function to determine CC
342
        check_cuda_result(cuda, cuda.cuDeviceComputeCapability(ref_major, ref_minor, device))
343
        ccs.append(f"{cc_major.value}.{cc_minor.value}")
344

Tim Dettmers's avatar
Tim Dettmers committed
345
    return ccs
346
347


348
# def get_compute_capability()-> Union[List[str, ...], None]: # FIXME: error
349
def get_compute_capability(cuda):
350
351
352
353
354
    """
    Extracts the highest compute capbility from all available GPUs, as compute
    capabilities are downwards compatible. If no GPUs are detected, it returns
    None.
    """
355
356
357
    if cuda is None: return None

    # TODO: handle different compute capabilities; for now, take the max
358
    ccs = get_compute_capabilities(cuda)
359
    if ccs: return ccs[-1]
360

361

362
def evaluate_cuda_setup():
363
364
    if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
        print('')
Tim Dettmers's avatar
Tim Dettmers committed
365
        print('='*35 + 'BUG REPORT' + '='*35)
366
        print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')
Tim Dettmers's avatar
Tim Dettmers committed
367
        print('='*80)
368
    if not torch.cuda.is_available(): return 'libsbitsandbytes_cpu.so', None, None, None, None
369
370

    cuda_setup = CUDASetup.get_instance()
371
372
373
374
375
    cudart_path = determine_cuda_runtime_lib_path()
    cuda = get_cuda_lib_handle()
    cc = get_compute_capability(cuda)
    cuda_version_string = get_cuda_version(cuda, cudart_path)

376
377
378
379
380
    failure = False
    if cudart_path is None:
        failure = True
        cuda_setup.add_log_entry("WARNING: No libcudart.so found! Install CUDA or the cudatoolkit package (anaconda)!", is_warning=True)
    else:
Tom Aarsen's avatar
Tom Aarsen committed
381
        cuda_setup.add_log_entry(f"CUDA SETUP: CUDA runtime path found: {cudart_path}")
382

383
    if cc == '' or cc is None:
384
385
386
387
        failure = True
        cuda_setup.add_log_entry("WARNING: No GPU detected! Check your CUDA paths. Proceeding to load CPU-only library...", is_warning=True)
    else:
        cuda_setup.add_log_entry(f"CUDA SETUP: Highest compute capability among GPUs detected: {cc}")
388

389
390
391
392
    if cuda is None:
        failure = True
    else:
        cuda_setup.add_log_entry(f'CUDA SETUP: Detected CUDA version {cuda_version_string}')
393

394
    # 7.5 is the minimum CC vor cublaslt
395
    has_cublaslt = is_cublasLt_compatible(cc)
396

397
    # TODO:
398
    # (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
399
400
    # (2) Multiple CUDA versions installed

401
402
    # we use ls -l instead of nvcc to determine the cuda version
    # since most installations will have the libcudart.so installed, but not the compiler
403

404
405
406
407
408
    if failure:
        binary_name = "libbitsandbytes_cpu.so"
    elif has_cublaslt:
        binary_name = f"libbitsandbytes_cuda{cuda_version_string}.so"
    else:
409
        "if not has_cublaslt (CC < 7.5), then we have to choose  _nocublaslt.so"
410
        binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt.so"
411

412
    return binary_name, cudart_path, cuda, cc, cuda_version_string