name: CMake on multiple platforms

on:
  push:
    branches: [ "main" ]
  pull_request:
    branches: [ "main" ]

concurrency:
  group: cmake-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build-shared-libs:
    runs-on: ${{ matrix.os }}

    strategy:
      # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
      fail-fast: false

      matrix:
        os: [ubuntu-latest, windows-latest]
        arch: [x86_64, aarch64]
        build_type: [Release]
        exclude:
          - os: windows-latest
            arch: aarch64

    steps:
    - uses: actions/checkout@v4

    - name: Set up MSVC
      if: matrix.os == 'windows-latest'
      uses: ilammy/msvc-dev-cmd@v1.13.0
      with:
        arch: amd64

    - name: Set reusable strings
      # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
      id: strings
      shell: bash
      run: |
        echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"

    - name: Prep build
      run: python3 -m pip install cmake==3.27.9 ninja setuptools wheel

    - name: Prep Compilers
      shell: bash -el {0}
      run: |
        if [ "${{ matrix.os }}" = "windows-latest" ]; then
            echo CXX_COMPILER=cl >> "$GITHUB_ENV"
            echo C_COMPILER=cl >> "$GITHUB_ENV"
        else
            echo CXX_COMPILER=g++ >> "$GITHUB_ENV"
            echo C_COMPILER=gcc >> "$GITHUB_ENV"
        fi


    - name: Configure CPU
      run: >
        cmake -B ${{ steps.strings.outputs.build-output-dir }}
        -G Ninja
        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
        -DCMAKE_C_COMPILER=${{ env.C_COMPILER }}
        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
        -DBUILD_CUDA=OFF
        -S ${{ github.workspace }}

    - name: Build CPU
      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}

    - name: Copy libraries
      shell: bash
      run: |
        mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
        ( shopt -s nullglob && cp -a bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }} )


    - name: Upload Build Artifacts
      uses: actions/upload-artifact@v4
      with:
        name: shared_library-${{ matrix.os }}-${{ matrix.arch }}
        path: output/*


  build-shared-libs-cuda:
    runs-on: ${{ matrix.os }}

    strategy:
      # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
      fail-fast: false

      matrix:
        os: [ubuntu-latest, windows-latest]
        cuda-version: ['11.8', '12.1']
        arch: [x86_64, aarch64]
        build_type: [Release]
        exclude:
          - os: windows-latest
            arch: aarch64

    steps:
    - uses: actions/checkout@v4
    - name: Set up Python 3.10
      uses: actions/setup-python@v5
      with:
        python-version: "3.10"

    - name: Set up MSVC
      if: matrix.os == 'windows-latest'
      uses: ilammy/msvc-dev-cmd@v1.13.0
      with:
        arch: amd64

    - name: Setup Mambaforge
      uses: conda-incubator/setup-miniconda@v3.0.1
      with:
        miniforge-variant: Mambaforge
        miniforge-version: latest
        activate-environment: bnb-env
        use-mamba: true

    - uses: conda-incubator/setup-miniconda@v3.0.1
      with:
        auto-update-conda: true
        activate-environment: bnb-env
        environment-file: environment-bnb.yml
        use-only-tar-bz2: false
        auto-activate-base: true
        python-version: "3.10"
        mamba-version: "*"

    - name: Set reusable strings
      # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
      id: strings
      shell: bash
      run: |
        echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"

    - name: CUDA Toolkit
      shell: bash -el {0}
      run: |
        if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then
            # to prepare space
            sudo rm -rf /usr/share/dotnet
            sudo rm -rf /opt/ghc
            sudo rm -rf /usr/local/share/boost
        fi
        addon=""
        cuda_version=${{ matrix.cuda-version }}
        [ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "ubuntu-latest" ] && addon="cuda-cudart-static cuda-nvrtc"
        [ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "windows-latest" ] && addon="cuda-nvrtc"
        [ "$cuda_version" = "11.8" ] && cuda_version="11.8.0"
        [ "$cuda_version" = "12.1" ] && cuda_version="12.1.1"

        conda install pytorch-cuda=${{ matrix.cuda-version }} -c pytorch # it's dependency not correctly resolved sometime
        conda install cuda-python=${{ matrix.cuda-version }} cuda-libraries-dev cuda-nvcc cuda-nvtx cuda-cupti cuda-cudart cuda-cudart-dev cuda-runtime cuda-libraries $addon -c "nvidia/label/cuda-$cuda_version"

        [ "${{ matrix.os }}" = "windows-latest" ] && conda install "clang>=17.0.6" "clangxx>=17.0.6" -c conda-forge

        CUDA_HOME="${{ env.CONDA }}/envs/bnb-env"
        echo CUDA_HOME=$CUDA_HOME >> "$GITHUB_ENV"
        echo CUDA_PATH=$CUDA_HOME >> "$GITHUB_ENV"

        if [ "${{ matrix.os }}" = "windows-latest" ]; then
            echo CXX_COMPILER=cl >> "$GITHUB_ENV"
            echo C_COMPILER=cl >> "$GITHUB_ENV"
            # without -DCMAKE_CUDA_COMPILER=nvcc, cmake config always fail for cuda-11.8
            echo DCMAKE_CUDA_COMPILER=-DCMAKE_CUDA_COMPILER=nvcc >> "$GITHUB_ENV"
        else
            echo CXX_COMPILER=g++ >> "$GITHUB_ENV"
            echo C_COMPILER=gcc >> "$GITHUB_ENV"
        fi

        nvcc --version

    - name: Update environment
      run: mamba env update -n bnb-env -f environment-bnb.yml

    - name: Prep build
      run: python -m pip install cmake==3.27.9 ninja setuptools wheel

    # TODO: the following steps (CUDA, NOBLASLT, CPU) could be moved to the matrix, so they're built in parallel

    - name: Configure CUDA
      run: >
        cmake -B ${{ steps.strings.outputs.build-output-dir }}
        -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
        -DCMAKE_C_COMPILER=${{ env.C_COMPILER }}
        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
        -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
        -S ${{ github.workspace }}

    - name: Build CUDA
      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}

    - name: Configure NOBLASLT
      run: >
        cmake -B ${{ steps.strings.outputs.build-output-dir }}
        -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
        -DCMAKE_C_COMPILER=${{ env.C_COMPILER }}
        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
        -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
        -DNO_CUBLASLT=ON
        -S ${{ github.workspace }}

    - name: Build NOBLASLT
      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}

    - name: Copy libraries
      shell: bash
      run: |
        mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
        ( shopt -s nullglob && cp -a bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }} )


    - name: Upload Build Artifacts
      uses: actions/upload-artifact@v4
      with:
        name: shared_library_cuda-${{ matrix.os }}-${{ matrix.cuda-version }}-${{ matrix.arch }}
        path: output/*


  build-wheels:
    needs:
    - build-shared-libs
    - build-shared-libs-cuda
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest]
        arch: [x86_64, aarch64]
        exclude:
          - os: windows-latest
            arch: aarch64

    steps:
    # Check out code
    - uses: actions/checkout@v4
    # Download shared libraries
    - name: Download build artifact
      uses: actions/download-artifact@v4
      with:
        merge-multiple: true
        path: output/
    - name: Copy correct platform shared libraries
      shell: bash
      run: |
        cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/
    # Set up the Python version needed
    - name: Set up Python 3.10
      uses: actions/setup-python@v5
      with:
        python-version: "3.10"
        cache: pip

    - name: Install build package
      shell: bash
      run: pip install build
    - name: Build wheel
      shell: bash
      run: python -m build . --wheel
    - name: Upload Build Artifacts
      uses: actions/upload-artifact@v4
      with:
        name: bdist_wheel-${{ matrix.os }}-${{ matrix.arch }}
        path: |
          ${{ github.workspace }}/dist/