Unverified Commit 3a630c58 authored by Younes Belkada's avatar Younes Belkada Committed by GitHub
Browse files

Merge pull request #908 from wkpark/cmake

Cmake + workflows
parents 89876bb0 0f3d0293
name: CMake on multiple platforms
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
# Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ['3.10', '3.11']
cuda-version: ['11.8', '12.1']
build_type: [Release]
c_compiler: [gcc, cl]
include:
- os: windows-latest
c_compiler: cl
cpp_compiler: cl
- os: ubuntu-latest
c_compiler: gcc
cpp_compiler: g++
exclude:
- os: ubuntu-latest
c_compiler: cl
- os: windows-latest
c_compiler: gcc
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Set up MSVC
if: matrix.os == 'windows-latest'
uses: ilammy/msvc-dev-cmd@v1.12.1
with:
arch: amd64
- name: Setup Mambaforge
uses: conda-incubator/setup-miniconda@v3.0.1
with:
miniforge-variant: Mambaforge
miniforge-version: latest
activate-environment: bnb-env
use-mamba: true
- uses: conda-incubator/setup-miniconda@v3.0.1
with:
auto-update-conda: true
activate-environment: bnb-env
environment-file: environment-bnb.yml
use-only-tar-bz2: false
auto-activate-base: true
python-version: ${{ matrix.python-version }}
mamba-version: "*"
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
id: strings
shell: bash
run: |
echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
- name: CUDA Toolkit
shell: bash -el {0}
run: |
if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then
# to prepare space
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
fi
addon=""
cuda_version=${{ matrix.cuda-version }}
[ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "ubuntu-latest" ] && addon="cuda-cudart-static cuda-nvrtc"
[ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "windows-latest" ] && addon="cuda-nvrtc"
[ "$cuda_version" = "11.8" ] && cuda_version="11.8.0"
[ "$cuda_version" = "12.1" ] && cuda_version="12.1.1"
conda install pytorch-cuda=${{ matrix.cuda-version }} -c pytorch # it's dependency not correctly resolved sometime
conda install cuda-python=${{ matrix.cuda-version }} cuda-libraries-dev cuda-nvcc cuda-nvtx cuda-cupti cuda-cudart cuda-cudart-dev cuda-runtime cuda-libraries $addon -c "nvidia/label/cuda-$cuda_version"
[ "${{ matrix.os }}" = "windows-latest" ] && conda install "clang>=17.0.6" "clangxx>=17.0.6" -c conda-forge
CUDA_HOME="${{ env.CONDA }}/envs/bnb-env"
echo CUDA_HOME=$CUDA_HOME >> "$GITHUB_ENV"
echo CUDA_PATH=$CUDA_HOME >> "$GITHUB_ENV"
if [ "${{ matrix.os }}" = "windows-latest" ]; then
# without -DCMAKE_CUDA_COMPILER=nvcc, cmake config always fail for cuda-11.8
echo DCMAKE_CUDA_COMPILER=-DCMAKE_CUDA_COMPILER=nvcc >> "$GITHUB_ENV"
fi
nvcc --version
- name: Update environment
run: mamba env update -n bnb-env -f environment-bnb.yml
- name: Prep build
run: python -m pip install cmake==3.27.9 ninja setuptools wheel
- name: Configure CMake
run: >
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
-S ${{ github.workspace }}
- name: Build
# Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
- name: Configure NOBLASLT
run: >
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
-DNO_CUBLASLT=ON
-S ${{ github.workspace }}
- name: Build NOBLASLT
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
- name: Configure CPU
run: >
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-DNO_CUBLASLT=ON
-DBUILD_CUDA=OFF
-S ${{ github.workspace }}
- name: Build CPU
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
- name: Test
working-directory: ${{ steps.strings.outputs.build-output-dir }}
# Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest --build-config ${{ matrix.build_type }}
- name: Build dist
shell: bash -el {0}
run: |
python -m pip install build
python -m build --wheel
mkdir dist/cu${{ matrix.cuda-version }}
mv dist/bitsandbytes*.* dist/cu${{ matrix.cuda-version }}/
- name: Upload Build Artifacts
uses: actions/upload-artifact@v4.3.0
with:
name: bitsandbytes-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cuda-version }}
path: |
${{ github.workspace }}/dist/
# This CMake config hopefully makes it easier to compile.
# Ensure the CUDA Toolkit is available on your path. Then run:
# For GCC: `cmake -B build . && cmake --build build`
# For MSVC: `cmake -B build . && cmake --build build --config Release`
# You can also use the following options
# - BUILD_CUDA: Default ON, will build with CUDA
# - NO_CUBLASLT: Default OFF, will skip building/linking CUBLASLT support
# - CUDA_VERSION: The expected CUDA version, for sanity checking. The actual version
# is whatever CMake finds on your path.
# - COMPUTE_CAPABILITY: Which GPU Arch/Compute codes to provide to NVCC.
# Separate by semicolons, i.e. `-DCOMPUTE_CAPABILITY=89;90`
# Check your compute capability here: https://developer.nvidia.com/cuda-gpus
# - PTXAS_VERBOSE: Pass the `-v` option to the PTX Assembler
cmake_minimum_required(VERSION 3.18)
project(bitsandbytes LANGUAGES C CXX)
option(BUILD_CUDA "Build bitsandbytes with CUDA support" ON)
option(NO_CUBLASLT "Disable CUBLAS" OFF)
option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)
set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.c)
list(APPEND CUDA_FILES csrc/ops.cu csrc/kernels.cu)
list(APPEND SRC_FILES ${CPP_FILES})
message(STATUS "BUILD_CUDA := ${BUILD_CUDA}")
message(STATUS "NO_CUBLASLT := ${NO_CUBLASLT}")
set(BNB_OUTPUT_NAME "bitsandbytes")
if(BUILD_CUDA)
enable_language(CUDA) # This will fail if CUDA is not found
# Convert the CUDA version from X.Y.z to XY. There's probably a shorter way of doing this
string(REGEX MATCH "^[0-9]+.[0-9]+" _CUDA_VERSION_FIRST_TWO "${CMAKE_CUDA_COMPILER_VERSION}")
string(REPLACE "." "" CUDA_VERSION_SHORT "${_CUDA_VERSION_FIRST_TWO}")
# Expose a cache variable that the user can set to ensure the correct version of CUDA is found
set(CUDA_VERSION "${CUDA_VERSION_SHORT}" CACHE STRING "Expected CUDA Version Shortcode")
message(STATUS "CUDA Version: ${CUDA_VERSION_SHORT} (${CMAKE_CUDA_COMPILER_VERSION})")
message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
# It should match the discovered version
if(NOT CUDA_VERSION STREQUAL "${CUDA_VERSION_SHORT}")
message(FATAL_ERROR "You've specified CUDA version ${CUDA_VERSION} however the CUDA compiler found is ${CUDA_VERSION_SHORT}."
" Ensure the desired CUDA compiler is the first one available on your PATH."
)
endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.0")
message(FATAL_ERROR "CUDA Version < 11 is not supported")
elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
message(FATAL_ERROR "CUDA Version > 12 is not supported")
endif()
string(APPEND CMAKE_CUDA_FLAGS " --use_fast_math")
if(PTXAS_VERBOSE)
# Verbose? Outputs register usage information, and other things...
string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v")
endif()
foreach(capability ${CMAKE_CUDA_ARCHITECTURES_ALL})
# Most of the items here are like: `xx-real`, so we just extract the `xx` portion
string(REGEX MATCH "[0-9]+" capability_id "${capability}")
if(capability_id GREATER 0)
list(APPEND POSSIBLE_CAPABILITIES ${capability_id})
endif()
endforeach()
# This can be changed via -D argument to CMake
# By default all possible capabilities are compiled
set(COMPUTE_CAPABILITY "${POSSIBLE_CAPABILITIES}" CACHE STRING "Compute Capabilities Targeted")
message(STATUS "CUDA Capabilities Available: ${POSSIBLE_CAPABILITIES}")
message(STATUS "CUDA Capabilities Selected: ${COMPUTE_CAPABILITY}")
foreach(capability ${COMPUTE_CAPABILITY})
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_${capability},code=sm_${capability}")
endforeach()
message(STATUS "CUDA NVCC Flags: ${CMAKE_CUDA_FLAGS}")
list(APPEND SRC_FILES ${CUDA_FILES})
string(APPEND BNB_OUTPUT_NAME "_cuda${CUDA_VERSION_SHORT}")
if(NO_CUBLASLT)
string(APPEND BNB_OUTPUT_NAME "_nocublaslt")
endif()
else()
message(STATUS "Building CPU Only")
string(APPEND BNB_OUTPUT_NAME "_cpu")
if(NO_CUBLASLT)
message(WARNING "We're building in CPU only mode but NO_CUBLASLT is enabled. It will have no effect.")
endif()
endif()
set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
add_library(bitsandbytes SHARED ${SRC_FILES})
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_include_directories(bitsandbytes PUBLIC csrc include)
target_compile_features(bitsandbytes PUBLIC cxx_std_14)
if(BUILD_CUDA)
target_compile_definitions(bitsandbytes PUBLIC BUILD_CUDA)
target_link_libraries(bitsandbytes PUBLIC cudart cublas cusparse)
if(NO_CUBLASLT)
target_compile_definitions(bitsandbytes PUBLIC NO_CUBLASLT)
else()
target_link_libraries(bitsandbytes PUBLIC cublasLt)
endif()
set_target_properties(bitsandbytes
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
)
endif()
if(WIN32)
set_target_properties(bitsandbytes PROPERTIES PREFIX "lib")
endif()
set_target_properties(bitsandbytes
PROPERTIES
OUTPUT_NAME ${BNB_OUTPUT_NAME}
# We have to use a generator expression to prevent MSVC Debug/Release subdirs being made
RUNTIME_OUTPUT_DIRECTORY "$<1:${CMAKE_SOURCE_DIR}/bitsandbytes>"
LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_SOURCE_DIR}/bitsandbytes>"
POSITION_INDEPENDENT_CODE ON # The `-fPIC` commands for non-windows compilers
WINDOWS_EXPORT_ALL_SYMBOLS ON # On Windows, export all c methods as DLL exports
)
# Installation # Installation
... work in progress ... Note currently `bitsandbytes` is only supported on CUDA GPU hardwares, support for AMD GPUs and M1 chips (MacOS) is coming soon.
\ No newline at end of file
<hfoptions id="OS system">
<hfoption id="Linux">
## Linux
### From Pypi
```bash
pip install bitsandbytes
```
### From source
```bash
git clone https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/
CUDA_VERSION=XXX make cuda12x
python setup.py install
```
with `XXX` being your CUDA version, for <12.0 call `make cuda 11x`
</hfoption>
<hfoption id="Windows">
## Windows
Currently for Windows users, you need to build bitsandbytes from source
```bash
git clone https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/
cmake -B build -DBUILD_CUDA=ON -S .
cmake --build build --config Release
python -m build --wheel
```
Big thanks to [wkpark](https://github.com/wkpark), [Jamezo97](https://github.com/Jamezo97), [rickardp](https://github.com/rickardp), [akx](https://github.com/akx) for their amazing contributions to make bitsandbytes compatible with Windows.
</hfoption>
</hfoptions>
# for cmake build
name: bnb
channels:
- pytorch
- nvidia
- conda-forge
dependencies:
- python
- accelerate
- einops
- scipy
- transformers
- pytest
- pytest-cases
- ipython
- debugpy
- yapf
- monkeytype
- rich
- pytest-sugar
...@@ -64,6 +64,16 @@ template <> struct InstrFloatTraits<SSE, double> ...@@ -64,6 +64,16 @@ template <> struct InstrFloatTraits<SSE, double>
typedef __m128d vec_t; typedef __m128d vec_t;
}; };
template <> struct InstrFloatTraits<Scalar, float>
{
typedef float vec_t;
};
template <> struct InstrFloatTraits<Scalar, double>
{
typedef double vec_t;
};
template <InstrSet I, typename T> template <InstrSet I, typename T>
struct FTOITraits struct FTOITraits
{ {
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import glob import glob
import os import os
from setuptools import find_packages, setup from setuptools import Extension, find_packages, setup
libs = list(glob.glob("./bitsandbytes/libbitsandbytes*.so")) libs = list(glob.glob("./bitsandbytes/libbitsandbytes*.so"))
libs += list(glob.glob("./bitsandbytes/libbitsandbytes*.dll")) libs += list(glob.glob("./bitsandbytes/libbitsandbytes*.dll"))
...@@ -35,6 +35,9 @@ setup( ...@@ -35,6 +35,9 @@ setup(
}, },
long_description=read("README.md"), long_description=read("README.md"),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
# HACK: pretend we have a native extension module so the wheel is tagged
# correctly with a platform tag (e.g. `-linux_x86_64.whl`).
ext_modules=[Extension("bitsandbytes", sources=[], language="c")],
classifiers=[ classifiers=[
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
"Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Artificial Intelligence",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment