Unverified Commit a3497ebc authored by Lei Wang's avatar Lei Wang Committed by GitHub
Browse files

[PATCH] Static libg++ linking fix (#854)

* bump version to 0.1.6

* phaseout py38

* py39

* Update submodule 'tvm' to latest commit adc0e48

* [Build] Update CMake and Python environment settings

- Added static linking flags for GCC and libstdc++ in CMakeLists.txt to enhance library linking.
- Removed the cmake version requirement from pyproject.toml to allow for broader compatibility.
- Updated the tox command in the Docker distribution script to include Python 3.8 for testing environments.

* [Build] Update Python version requirements in scripts and documentation

- Changed Python version requirement in README.md from 3.9+ to 3.8+.
- Updated installation and testing scripts to use Python 3.8 instead of 3.9, ensuring compatibility with the new minimum version.
- Adjusted tox commands in local and PyPI distribution scripts to include Python 3.8 in the testing environments.

* [Build] Update Python and CMake requirements in Dockerfile and pyproject.toml

- Added CMake version requirement (>=3.26) to pyproject.toml for build compatibility.
- Created a Python 3.8 environment in the Dockerfile and added a symlink for easier access to the Python 3.8 executable.

* [Build] Update CMake and Dockerfile for improved compatibility

- Removed static linking flags from CMakeLists.txt to simplify build configuration.
- Updated Dockerfile to use Ubuntu 20.04 and streamlined the installation of dependencies, removing gcc-9 and g++-9.
- Adjusted symlink creation for Python environments to use the `-sf` option for safer linking.

* [Build] Bump version to 0.1.6.post1 for post-release updates

* [Build] Remove static linking flags from CMakeLists.txt

- Eliminated static linking flags for GCC and libstdc++ to simplify build configuration and avoid potential conflicts with Python extensions.

* [Build] Update Docker distribution scripts for manylinux compatibility

- Changed base image from `tilelang-builder:18.04` to `tilelang-builder:manylinux` in both local and PyPI distribution scripts.
- Updated Dockerfile references to use `pypi.manylinux.Dockerfile`.
- Added `--gpus all` flag to the Docker run command to enable GPU support during execution.

* lint fix

* add cmake
parent 1ad6e461
......@@ -7,9 +7,6 @@ project(TILE_LANG C CXX)
option(TILE_LANG_STATIC_STDCPP "Statically link libstdc++ for TileLang libraries" ON)
option(TILE_LANG_INSTALL_STATIC_LIB "Install the static library" ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -static-libgcc -static-libstdc++")
if(TILE_LANG_STATIC_STDCPP)
message(STATUS "Enabling static linking of C++ standard library")
# Note: We'll apply static linking flags selectively to avoid Python extension conflicts
......
# Get the CUDA version from the command line
IMAGE="tilelang-builder:18.04"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.Dockerfile" --tag ${IMAGE}
IMAGE="tilelang-builder:manylinux"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt"
tox_command="python3.8 -m tox -e py38,py39,py310,py311,py312"
docker run --rm -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
docker run --rm --gpus all -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
# Get the CUDA version from the command line
IMAGE="tilelang-builder:18.04"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.Dockerfile" --tag ${IMAGE}
IMAGE="tilelang-builder:manylinux"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt"
tox_command="python3.8 -m tox -e py38-pypi,py39-pypi,py310-pypi,py311-pypi,py312-pypi,audit_2_27"
tox_command="python3.8 -m tox -e py38-pypi,py39-pypi,py310-pypi,py311-pypi,py312-pypi"
docker run --rm -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
docker run --rm --gpus all -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
FROM nvidia/cuda:12.1.0-devel-ubuntu18.04
FROM nvidia/cuda:12.1.0-devel-ubuntu20.04
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Etc/UTC
RUN set -eux; \
apt-get update; \
# Install gcc-9 and g++-9
apt-get install -y software-properties-common; \
add-apt-repository ppa:ubuntu-toolchain-r/test -y; \
apt-get update; \
apt-get install -y wget curl libtinfo-dev zlib1g-dev libssl-dev build-essential \
libedit-dev libxml2-dev git gcc-9 g++-9; \
# Switch default gcc/g++ to new version
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100; \
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 100; \
update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100; \
update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100; \
gcc --version; g++ --version; \
libedit-dev libxml2-dev git; \
curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh; \
bash Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3; \
rm Miniconda3-latest-Linux-x86_64.sh;
......@@ -23,7 +19,7 @@ RUN apt-get update && apt-get install -y ninja-build
ENV PATH=/miniconda3/bin/:$PATH
# ✅ Accept Anaconda Terms of Service for both required channels
RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main; \
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
# Create environments
......@@ -33,11 +29,11 @@ RUN set -eux; \
conda create -n py310 python=3.10 -y; \
conda create -n py311 python=3.11 -y; \
conda create -n py312 python=3.12 -y; \
ln -s /miniconda3/envs/py38/bin/python3.8 /usr/bin/python3.8; \
ln -s /miniconda3/envs/py39/bin/python3.9 /usr/bin/python3.9; \
ln -s /miniconda3/envs/py310/bin/python3.10 /usr/bin/python3.10; \
ln -s /miniconda3/envs/py311/bin/python3.11 /usr/bin/python3.11; \
ln -s /miniconda3/envs/py312/bin/python3.12 /usr/bin/python3.12; \
ln -sf /miniconda3/envs/py38/bin/python3.8 /usr/bin/python3.8; \
ln -sf /miniconda3/envs/py39/bin/python3.9 /usr/bin/python3.9; \
ln -sf /miniconda3/envs/py310/bin/python3.10 /usr/bin/python3.10; \
ln -sf /miniconda3/envs/py311/bin/python3.11 /usr/bin/python3.11; \
ln -sf /miniconda3/envs/py312/bin/python3.12 /usr/bin/python3.12; \
conda install -y cmake patchelf
WORKDIR /tilelang
FROM pytorch/manylinux-builder:cuda12.1
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Etc/UTC
RUN set -eux; \
yum -y update && yum install -y \
zlib-devel openssl-devel \
libedit-devel libxml2-devel \
bzip2 bzip2-devel xz xz-devel \
epel-release
RUN set -eux; \
conda create -n py38 python=3.8 -y && \
conda create -n py39 python=3.9 -y && \
conda create -n py310 python=3.10 -y && \
conda create -n py311 python=3.11 -y && \
conda create -n py312 python=3.12 -y && \
ln -sf /opt/conda/envs/py38/bin/python3.8 /usr/bin/python3.8 && \
ln -sf /opt/conda/envs/py39/bin/python3.9 /usr/bin/python3.9 && \
ln -sf /opt/conda/envs/py310/bin/python3.10 /usr/bin/python3.10 && \
ln -sf /opt/conda/envs/py311/bin/python3.11 /usr/bin/python3.11 && \
ln -sf /opt/conda/envs/py312/bin/python3.12 /usr/bin/python3.12 && \
conda install -y cmake patchelf
WORKDIR /tilelang
from tvm.tir import (BufferStore, For, AttrStmt, ForKind, Var, PrimFunc, BufferLoad, Buffer, IntImm)
from tvm.tir.stmt_functor import ir_transform, post_order_visit
from tvm.tir.transform import prim_func_pass
from typing import Tuple, List, Dict
def AddWrapperForSingleBufStore():
......@@ -41,7 +42,7 @@ def AddWrapperForSingleBufStore():
post_order_visit(operation, visit_variable)
return used_variables
def collect_buffer_accesses(statement) -> tuple[list[Buffer], list[Buffer]]:
def collect_buffer_accesses(statement) -> Tuple[List[Buffer], List[Buffer]]:
"""
Categorizes buffers accessed in the statement by their scope.
......@@ -68,7 +69,7 @@ def AddWrapperForSingleBufStore():
local_buffers.append(buffer)
return local_buffers, fragment_buffers
def collect_buffer_indices(statement) -> dict[Buffer, list[int]]:
def collect_buffer_indices(statement) -> Dict[Buffer, List[int]]:
"""
Maps each buffer to its access indices.
......
import os
import torch
import warnings
from typing import Optional
from typing import Optional, Tuple
from tilelang.contrib import nvcc
from torch.utils.cpp_extension import load, _import_module_from_library
from tilelang import env
......@@ -44,7 +44,7 @@ def _get_cached_lib():
def compress_sm90(A: torch.Tensor, block_k: int,
transposed: bool) -> tuple[torch.Tensor, torch.Tensor]:
transposed: bool) -> Tuple[torch.Tensor, torch.Tensor]:
if block_k > 128:
block_k = 128
# Ref: https://github.com/NVIDIA/cutlass/blob/c2ad7c5b20f131c4ba33601860f1da3f9c9df0f3/include/cutlass/gemm/collective/builders/sm90_sparse_gmma_builder.inl#L145-L146
......@@ -56,7 +56,7 @@ def compress_sm90(A: torch.Tensor, block_k: int,
return compress_lib.compress_sm90(A, block_k, transposed)
def compress_sm80(A: torch.Tensor, transposed: bool) -> tuple[torch.Tensor, torch.Tensor]:
def compress_sm80(A: torch.Tensor, transposed: bool) -> Tuple[torch.Tensor, torch.Tensor]:
try:
from torch.sparse import to_sparse_semi_structured, SparseSemiStructuredTensor
except ImportError as err:
......@@ -76,7 +76,7 @@ def compress_sm80(A: torch.Tensor, transposed: bool) -> tuple[torch.Tensor, torc
def compress(A: torch.Tensor,
transposed: bool,
arch: Optional[str] = None,
**kwargs) -> tuple[torch.Tensor, torch.Tensor]:
**kwargs) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Compress a tensor using the appropriate method based on the CUDA architecture.
"""
......
[tox]
envlist = py38,py39,py310,py311,py312
isolated_build = True
isolated_build = False
[testenv:py{38,39,310,311,312}]
skip_install = false
deps =
wheel
build
setenv =
PYTHON_EXECUTABLE = {envpython}
Python3_EXECUTABLE = {envpython}
commands =
python -m build --wheel -o {toxinidir}/dist
[testenv:py{38,39,310,311,312}-pypi]
skip_install = false
setenv =
PYPI_BUILD = TRUE
PYTHON_EXECUTABLE = {envpython}
Python3_EXECUTABLE = {envpython}
commands =
python setup.py bdist_wheel
python setup.py bdist_wheel --plat-name=manylinux2014_x86_64
[testenv:audit_2_27]
[testenv:audit_manylinux2014]
skip_install = true
allowlist_externals =
bash
......@@ -24,7 +30,7 @@ deps =
auditwheel
patchelf
commands =
bash -c 'auditwheel repair -L=/lib --exclude=/usr/local/cuda* --exclude=libcuda.so.1 --plat=manylinux_2_27_x86_64 dist/*'
bash -c 'auditwheel repair -L=/lib --exclude=/usr/local/cuda* --exclude=libcuda.so.1 --plat=manylinux2014_x86_64 dist/*'
[testenv:py38]
basepython = python3.8
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment