Unverified Commit a3497ebc authored by Lei Wang's avatar Lei Wang Committed by GitHub
Browse files

[PATCH] Static libg++ linking fix (#854)

* bump version to 0.1.6

* phaseout py38

* py39

* Update submodule 'tvm' to latest commit adc0e48

* [Build] Update CMake and Python environment settings

- Added static linking flags for GCC and libstdc++ in CMakeLists.txt to enhance library linking.
- Removed the cmake version requirement from pyproject.toml to allow for broader compatibility.
- Updated the tox command in the Docker distribution script to include Python 3.8 for testing environments.

* [Build] Update Python version requirements in scripts and documentation

- Changed Python version requirement in README.md from 3.9+ to 3.8+.
- Updated installation and testing scripts to use Python 3.8 instead of 3.9, ensuring compatibility with the new minimum version.
- Adjusted tox commands in local and PyPI distribution scripts to include Python 3.8 in the testing environments.

* [Build] Update Python and CMake requirements in Dockerfile and pyproject.toml

- Added CMake version requirement (>=3.26) to pyproject.toml for build compatibility.
- Created a Python 3.8 environment in the Dockerfile and added a symlink for easier access to the Python 3.8 executable.

* [Build] Update CMake and Dockerfile for improved compatibility

- Removed static linking flags from CMakeLists.txt to simplify build configuration.
- Updated Dockerfile to use Ubuntu 20.04 and streamlined the installation of dependencies, removing gcc-9 and g++-9.
- Adjusted symlink creation for Python environments to use the `-sf` option for safer linking.

* [Build] Bump version to 0.1.6.post1 for post-release updates

* [Build] Remove static linking flags from CMakeLists.txt

- Eliminated static linking flags for GCC and libstdc++ to simplify build configuration and avoid potential conflicts with Python extensions.

* [Build] Update Docker distribution scripts for manylinux compatibility

- Changed base image from `tilelang-builder:18.04` to `tilelang-builder:manylinux` in both local and PyPI distribution scripts.
- Updated Dockerfile references to use `pypi.manylinux.Dockerfile`.
- Added `--gpus all` flag to the Docker run command to enable GPU support during execution.

* lint fix

* add cmake
parent 1ad6e461
...@@ -7,9 +7,6 @@ project(TILE_LANG C CXX) ...@@ -7,9 +7,6 @@ project(TILE_LANG C CXX)
option(TILE_LANG_STATIC_STDCPP "Statically link libstdc++ for TileLang libraries" ON) option(TILE_LANG_STATIC_STDCPP "Statically link libstdc++ for TileLang libraries" ON)
option(TILE_LANG_INSTALL_STATIC_LIB "Install the static library" ON) option(TILE_LANG_INSTALL_STATIC_LIB "Install the static library" ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -static-libgcc -static-libstdc++")
if(TILE_LANG_STATIC_STDCPP) if(TILE_LANG_STATIC_STDCPP)
message(STATUS "Enabling static linking of C++ standard library") message(STATUS "Enabling static linking of C++ standard library")
# Note: We'll apply static linking flags selectively to avoid Python extension conflicts # Note: We'll apply static linking flags selectively to avoid Python extension conflicts
......
# Get the CUDA version from the command line # Get the CUDA version from the command line
IMAGE="tilelang-builder:18.04" IMAGE="tilelang-builder:manylinux"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.Dockerfile" --tag ${IMAGE} docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt" install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt"
tox_command="python3.8 -m tox -e py38,py39,py310,py311,py312" tox_command="python3.8 -m tox -e py38,py39,py310,py311,py312"
docker run --rm -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command" docker run --rm --gpus all -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
# Get the CUDA version from the command line # Get the CUDA version from the command line
IMAGE="tilelang-builder:18.04" IMAGE="tilelang-builder:manylinux"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.Dockerfile" --tag ${IMAGE} docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt" install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt"
tox_command="python3.8 -m tox -e py38-pypi,py39-pypi,py310-pypi,py311-pypi,py312-pypi,audit_2_27" tox_command="python3.8 -m tox -e py38-pypi,py39-pypi,py310-pypi,py311-pypi,py312-pypi"
docker run --rm -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command" docker run --rm --gpus all -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
FROM nvidia/cuda:12.1.0-devel-ubuntu18.04 FROM nvidia/cuda:12.1.0-devel-ubuntu20.04
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Etc/UTC
RUN set -eux; \ RUN set -eux; \
apt-get update; \ apt-get update; \
# Install gcc-9 and g++-9
apt-get install -y software-properties-common; \ apt-get install -y software-properties-common; \
add-apt-repository ppa:ubuntu-toolchain-r/test -y; \ add-apt-repository ppa:ubuntu-toolchain-r/test -y; \
apt-get update; \ apt-get update; \
apt-get install -y wget curl libtinfo-dev zlib1g-dev libssl-dev build-essential \ apt-get install -y wget curl libtinfo-dev zlib1g-dev libssl-dev build-essential \
libedit-dev libxml2-dev git gcc-9 g++-9; \ libedit-dev libxml2-dev git; \
# Switch default gcc/g++ to new version
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100; \
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 100; \
update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100; \
update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100; \
gcc --version; g++ --version; \
curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh; \ curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh; \
bash Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3; \ bash Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3; \
rm Miniconda3-latest-Linux-x86_64.sh; rm Miniconda3-latest-Linux-x86_64.sh;
...@@ -23,7 +19,7 @@ RUN apt-get update && apt-get install -y ninja-build ...@@ -23,7 +19,7 @@ RUN apt-get update && apt-get install -y ninja-build
ENV PATH=/miniconda3/bin/:$PATH ENV PATH=/miniconda3/bin/:$PATH
# ✅ Accept Anaconda Terms of Service for both required channels # ✅ Accept Anaconda Terms of Service for both required channels
RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \ RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main; \
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
# Create environments # Create environments
...@@ -33,11 +29,11 @@ RUN set -eux; \ ...@@ -33,11 +29,11 @@ RUN set -eux; \
conda create -n py310 python=3.10 -y; \ conda create -n py310 python=3.10 -y; \
conda create -n py311 python=3.11 -y; \ conda create -n py311 python=3.11 -y; \
conda create -n py312 python=3.12 -y; \ conda create -n py312 python=3.12 -y; \
ln -s /miniconda3/envs/py38/bin/python3.8 /usr/bin/python3.8; \ ln -sf /miniconda3/envs/py38/bin/python3.8 /usr/bin/python3.8; \
ln -s /miniconda3/envs/py39/bin/python3.9 /usr/bin/python3.9; \ ln -sf /miniconda3/envs/py39/bin/python3.9 /usr/bin/python3.9; \
ln -s /miniconda3/envs/py310/bin/python3.10 /usr/bin/python3.10; \ ln -sf /miniconda3/envs/py310/bin/python3.10 /usr/bin/python3.10; \
ln -s /miniconda3/envs/py311/bin/python3.11 /usr/bin/python3.11; \ ln -sf /miniconda3/envs/py311/bin/python3.11 /usr/bin/python3.11; \
ln -s /miniconda3/envs/py312/bin/python3.12 /usr/bin/python3.12; \ ln -sf /miniconda3/envs/py312/bin/python3.12 /usr/bin/python3.12; \
conda install -y cmake patchelf conda install -y cmake patchelf
WORKDIR /tilelang WORKDIR /tilelang
FROM pytorch/manylinux-builder:cuda12.1
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Etc/UTC
RUN set -eux; \
yum -y update && yum install -y \
zlib-devel openssl-devel \
libedit-devel libxml2-devel \
bzip2 bzip2-devel xz xz-devel \
epel-release
RUN set -eux; \
conda create -n py38 python=3.8 -y && \
conda create -n py39 python=3.9 -y && \
conda create -n py310 python=3.10 -y && \
conda create -n py311 python=3.11 -y && \
conda create -n py312 python=3.12 -y && \
ln -sf /opt/conda/envs/py38/bin/python3.8 /usr/bin/python3.8 && \
ln -sf /opt/conda/envs/py39/bin/python3.9 /usr/bin/python3.9 && \
ln -sf /opt/conda/envs/py310/bin/python3.10 /usr/bin/python3.10 && \
ln -sf /opt/conda/envs/py311/bin/python3.11 /usr/bin/python3.11 && \
ln -sf /opt/conda/envs/py312/bin/python3.12 /usr/bin/python3.12 && \
conda install -y cmake patchelf
WORKDIR /tilelang
from tvm.tir import (BufferStore, For, AttrStmt, ForKind, Var, PrimFunc, BufferLoad, Buffer, IntImm) from tvm.tir import (BufferStore, For, AttrStmt, ForKind, Var, PrimFunc, BufferLoad, Buffer, IntImm)
from tvm.tir.stmt_functor import ir_transform, post_order_visit from tvm.tir.stmt_functor import ir_transform, post_order_visit
from tvm.tir.transform import prim_func_pass from tvm.tir.transform import prim_func_pass
from typing import Tuple, List, Dict
def AddWrapperForSingleBufStore(): def AddWrapperForSingleBufStore():
...@@ -41,7 +42,7 @@ def AddWrapperForSingleBufStore(): ...@@ -41,7 +42,7 @@ def AddWrapperForSingleBufStore():
post_order_visit(operation, visit_variable) post_order_visit(operation, visit_variable)
return used_variables return used_variables
def collect_buffer_accesses(statement) -> tuple[list[Buffer], list[Buffer]]: def collect_buffer_accesses(statement) -> Tuple[List[Buffer], List[Buffer]]:
""" """
Categorizes buffers accessed in the statement by their scope. Categorizes buffers accessed in the statement by their scope.
...@@ -68,7 +69,7 @@ def AddWrapperForSingleBufStore(): ...@@ -68,7 +69,7 @@ def AddWrapperForSingleBufStore():
local_buffers.append(buffer) local_buffers.append(buffer)
return local_buffers, fragment_buffers return local_buffers, fragment_buffers
def collect_buffer_indices(statement) -> dict[Buffer, list[int]]: def collect_buffer_indices(statement) -> Dict[Buffer, List[int]]:
""" """
Maps each buffer to its access indices. Maps each buffer to its access indices.
......
import os import os
import torch import torch
import warnings import warnings
from typing import Optional from typing import Optional, Tuple
from tilelang.contrib import nvcc from tilelang.contrib import nvcc
from torch.utils.cpp_extension import load, _import_module_from_library from torch.utils.cpp_extension import load, _import_module_from_library
from tilelang import env from tilelang import env
...@@ -44,7 +44,7 @@ def _get_cached_lib(): ...@@ -44,7 +44,7 @@ def _get_cached_lib():
def compress_sm90(A: torch.Tensor, block_k: int, def compress_sm90(A: torch.Tensor, block_k: int,
transposed: bool) -> tuple[torch.Tensor, torch.Tensor]: transposed: bool) -> Tuple[torch.Tensor, torch.Tensor]:
if block_k > 128: if block_k > 128:
block_k = 128 block_k = 128
# Ref: https://github.com/NVIDIA/cutlass/blob/c2ad7c5b20f131c4ba33601860f1da3f9c9df0f3/include/cutlass/gemm/collective/builders/sm90_sparse_gmma_builder.inl#L145-L146 # Ref: https://github.com/NVIDIA/cutlass/blob/c2ad7c5b20f131c4ba33601860f1da3f9c9df0f3/include/cutlass/gemm/collective/builders/sm90_sparse_gmma_builder.inl#L145-L146
...@@ -56,7 +56,7 @@ def compress_sm90(A: torch.Tensor, block_k: int, ...@@ -56,7 +56,7 @@ def compress_sm90(A: torch.Tensor, block_k: int,
return compress_lib.compress_sm90(A, block_k, transposed) return compress_lib.compress_sm90(A, block_k, transposed)
def compress_sm80(A: torch.Tensor, transposed: bool) -> tuple[torch.Tensor, torch.Tensor]: def compress_sm80(A: torch.Tensor, transposed: bool) -> Tuple[torch.Tensor, torch.Tensor]:
try: try:
from torch.sparse import to_sparse_semi_structured, SparseSemiStructuredTensor from torch.sparse import to_sparse_semi_structured, SparseSemiStructuredTensor
except ImportError as err: except ImportError as err:
...@@ -76,7 +76,7 @@ def compress_sm80(A: torch.Tensor, transposed: bool) -> tuple[torch.Tensor, torc ...@@ -76,7 +76,7 @@ def compress_sm80(A: torch.Tensor, transposed: bool) -> tuple[torch.Tensor, torc
def compress(A: torch.Tensor, def compress(A: torch.Tensor,
transposed: bool, transposed: bool,
arch: Optional[str] = None, arch: Optional[str] = None,
**kwargs) -> tuple[torch.Tensor, torch.Tensor]: **kwargs) -> Tuple[torch.Tensor, torch.Tensor]:
""" """
Compress a tensor using the appropriate method based on the CUDA architecture. Compress a tensor using the appropriate method based on the CUDA architecture.
""" """
......
[tox] [tox]
envlist = py38,py39,py310,py311,py312 envlist = py38,py39,py310,py311,py312
isolated_build = True isolated_build = False
[testenv:py{38,39,310,311,312}] [testenv:py{38,39,310,311,312}]
skip_install = false
deps = deps =
wheel wheel
build build
setenv =
PYTHON_EXECUTABLE = {envpython}
Python3_EXECUTABLE = {envpython}
commands = commands =
python -m build --wheel -o {toxinidir}/dist python -m build --wheel -o {toxinidir}/dist
[testenv:py{38,39,310,311,312}-pypi] [testenv:py{38,39,310,311,312}-pypi]
skip_install = false
setenv = setenv =
PYPI_BUILD = TRUE PYPI_BUILD = TRUE
PYTHON_EXECUTABLE = {envpython}
Python3_EXECUTABLE = {envpython}
commands = commands =
python setup.py bdist_wheel python setup.py bdist_wheel --plat-name=manylinux2014_x86_64
[testenv:audit_2_27] [testenv:audit_manylinux2014]
skip_install = true skip_install = true
allowlist_externals = allowlist_externals =
bash bash
...@@ -24,7 +30,7 @@ deps = ...@@ -24,7 +30,7 @@ deps =
auditwheel auditwheel
patchelf patchelf
commands = commands =
bash -c 'auditwheel repair -L=/lib --exclude=/usr/local/cuda* --exclude=libcuda.so.1 --plat=manylinux_2_27_x86_64 dist/*' bash -c 'auditwheel repair -L=/lib --exclude=/usr/local/cuda* --exclude=libcuda.so.1 --plat=manylinux2014_x86_64 dist/*'
[testenv:py38] [testenv:py38]
basepython = python3.8 basepython = python3.8
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment