[PATCH] Static libg++ linking fix (#854)

* bump version to 0.1.6 * phaseout py38 * py39 * Update submodule 'tvm' to latest commit adc0e48 * [Build] Update CMake and Python environment settings - Added static linking flags for GCC and libstdc++ in CMakeLists.txt to enhance library linking. - Removed the cmake version requirement from pyproject.toml to allow for broader compatibility. - Updated the tox command in the Docker distribution script to include Python 3.8 for testing environments. * [Build] Update Python version requirements in scripts and documentation - Changed Python version requirement in README.md from 3.9+ to 3.8+. - Updated installation and testing scripts to use Python 3.8 instead of 3.9, ensuring compatibility with the new minimum version. - Adjusted tox commands in local and PyPI distribution scripts to include Python 3.8 in the testing environments. * [Build] Update Python and CMake requirements in Dockerfile and pyproject.toml - Added CMake version requirement (>=3.26) to pyproject.toml for build compatibility. - Created a Python 3.8 environment in the Dockerfile and added a symlink for easier access to the Python 3.8 executable. * [Build] Update CMake and Dockerfile for improved compatibility - Removed static linking flags from CMakeLists.txt to simplify build configuration. - Updated Dockerfile to use Ubuntu 20.04 and streamlined the installation of dependencies, removing gcc-9 and g++-9. - Adjusted symlink creation for Python environments to use the `-sf` option for safer linking. * [Build] Bump version to 0.1.6.post1 for post-release updates * [Build] Remove static linking flags from CMakeLists.txt - Eliminated static linking flags for GCC and libstdc++ to simplify build configuration and avoid potential conflicts with Python extensions. * [Build] Update Docker distribution scripts for manylinux compatibility - Changed base image from `tilelang-builder:18.04` to `tilelang-builder:manylinux` in both local and PyPI distribution scripts. - Updated Dockerfile references to use `pypi.manylinux.Dockerfile`. - Added `--gpus all` flag to the Docker run command to enable GPU support during execution. * lint fix * add cmake

[PATCH] Static libg++ linking fix (#854)
* bump version to 0.1.6 * phaseout py38 * py39 * Update submodule 'tvm' to latest commit adc0e48 * [Build] Update CMake and Python environment settings - Added static linking flags for GCC and libstdc++ in CMakeLists.txt to enhance library linking. - Removed the cmake version requirement from pyproject.toml to allow for broader compatibility. - Updated the tox command in the Docker distribution script to include Python 3.8 for testing environments. * [Build] Update Python version requirements in scripts and documentation - Changed Python version requirement in README.md from 3.9+ to 3.8+. - Updated installation and testing scripts to use Python 3.8 instead of 3.9, ensuring compatibility with the new minimum version. - Adjusted tox commands in local and PyPI distribution scripts to include Python 3.8 in the testing environments. * [Build] Update Python and CMake requirements in Dockerfile and pyproject.toml - Added CMake version requirement (>=3.26) to pyproject.toml for build compatibility. - Created a Python 3.8 environment in the Dockerfile and added a symlink for easier access to the Python 3.8 executable. * [Build] Update CMake and Dockerfile for improved compatibility - Removed static linking flags from CMakeLists.txt to simplify build configuration. - Updated Dockerfile to use Ubuntu 20.04 and streamlined the installation of dependencies, removing gcc-9 and g++-9. - Adjusted symlink creation for Python environments to use the `-sf` option for safer linking. * [Build] Bump version to 0.1.6.post1 for post-release updates * [Build] Remove static linking flags from CMakeLists.txt - Eliminated static linking flags for GCC and libstdc++ to simplify build configuration and avoid potential conflicts with Python extensions. * [Build] Update Docker distribution scripts for manylinux compatibility - Changed base image from `tilelang-builder:18.04` to `tilelang-builder:manylinux` in both local and PyPI distribution scripts. - Updated Dockerfile references to use `pypi.manylinux.Dockerfile`. - Added `--gpus all` flag to the Docker run command to enable GPU support during execution. * lint fix * add cmake
a3497ebc · Lei Wang · GitHub · 1ad6e461 · a3497ebc · a3497ebc
Unverified Commit a3497ebc authored Sep 22, 2025 by Lei Wang Committed by GitHub Sep 22, 2025
9 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,9 +7,6 @@ project(TILE_LANG C CXX)
 option(TILE_LANG_STATIC_STDCPP "Statically link libstdc++ for TileLang libraries" ON)
 option(TILE_LANG_INSTALL_STATIC_LIB "Install the static library" ON)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -static-libgcc -static-libstdc++")
 if(TILE_LANG_STATIC_STDCPP)
  message(STATUS "Enabling static linking of C++ standard library")
  # Note: We'll apply static linking flags selectively to avoid Python extension conflicts

--- a/VERSION
+++ b/VERSION
-0.1.6
+0.1.6.post1
--- a/maint/scripts/docker_local_distribute.sh
+++ b/maint/scripts/docker_local_distribute.sh
 # Get the CUDA version from the command line
-IMAGE="tilelang-builder:18.04"
+IMAGE="tilelang-builder:manylinux"
-docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.Dockerfile" --tag ${IMAGE}
+docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
 install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt"
 tox_command="python3.8 -m tox -e py38,py39,py310,py311,py312"
-docker run --rm -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
+docker run --rm --gpus all -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
--- a/maint/scripts/docker_pypi_distribute.sh
+++ b/maint/scripts/docker_pypi_distribute.sh
 # Get the CUDA version from the command line
-IMAGE="tilelang-builder:18.04"
+IMAGE="tilelang-builder:manylinux"
-docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.Dockerfile" --tag ${IMAGE}
+docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
 install_pip="python3.8 -m pip install --upgrade pip && python3.8 -m pip install -r requirements-build.txt"
-tox_command="python3.8 -m tox -e py38-pypi,py39-pypi,py310-pypi,py311-pypi,py312-pypi,audit_2_27"
+tox_command="python3.8 -m tox -e py38-pypi,py39-pypi,py310-pypi,py311-pypi,py312-pypi"
-docker run --rm -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
+docker run --rm --gpus all -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$install_pip && $tox_command"
--- a/maint/scripts/pypi.Dockerfile
+++ b/maint/scripts/pypi.Dockerfile
-FROM nvidia/cuda:12.1.0-devel-ubuntu18.04
+FROM nvidia/cuda:12.1.0-devel-ubuntu20.04
+ENV DEBIAN_FRONTEND=noninteractive \
+    TZ=Etc/UTC
 RUN set -eux; \
    apt-get update; \
-    # Install gcc-9 and g++-9
    apt-get install -y software-properties-common; \
    add-apt-repository ppa:ubuntu-toolchain-r/test -y; \
    apt-get update; \
    apt-get install -y wget curl libtinfo-dev zlib1g-dev libssl-dev build-essential \
-                       libedit-dev libxml2-dev git gcc-9 g++-9; \
+                       libedit-dev libxml2-dev git; \
-    # Switch default gcc/g++ to new version
-    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100; \
-    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 100; \
-    update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100; \
-    update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100; \
-    gcc --version; g++ --version; \
    curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh; \
    bash Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda3; \
    rm Miniconda3-latest-Linux-x86_64.sh;
@@ -23,7 +19,7 @@ RUN apt-get update && apt-get install -y ninja-build
 ENV PATH=/miniconda3/bin/:$PATH
 # ✅ Accept Anaconda Terms of Service for both required channels
-RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
+RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main; \
    conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
 # Create environments
@@ -33,11 +29,11 @@ RUN set -eux; \
    conda create -n py310 python=3.10 -y; \
    conda create -n py311 python=3.11 -y; \
    conda create -n py312 python=3.12 -y; \
-    ln -s /miniconda3/envs/py38/bin/python3.8 /usr/bin/python3.8; \
+    ln -sf /miniconda3/envs/py38/bin/python3.8 /usr/bin/python3.8; \
-    ln -s /miniconda3/envs/py39/bin/python3.9 /usr/bin/python3.9; \
+    ln -sf /miniconda3/envs/py39/bin/python3.9 /usr/bin/python3.9; \
-    ln -s /miniconda3/envs/py310/bin/python3.10 /usr/bin/python3.10; \
+    ln -sf /miniconda3/envs/py310/bin/python3.10 /usr/bin/python3.10; \
-    ln -s /miniconda3/envs/py311/bin/python3.11 /usr/bin/python3.11; \
+    ln -sf /miniconda3/envs/py311/bin/python3.11 /usr/bin/python3.11; \
-    ln -s /miniconda3/envs/py312/bin/python3.12 /usr/bin/python3.12; \
+    ln -sf /miniconda3/envs/py312/bin/python3.12 /usr/bin/python3.12; \
    conda install -y cmake patchelf
 WORKDIR /tilelang
--- a/maint/scripts/pypi.manylinux.Dockerfile
+++ b/maint/scripts/pypi.manylinux.Dockerfile
+FROM pytorch/manylinux-builder:cuda12.1
+ENV DEBIAN_FRONTEND=noninteractive \
+    TZ=Etc/UTC
+RUN set -eux; \
+    yum -y update && yum install -y \
+        zlib-devel openssl-devel \
+        libedit-devel libxml2-devel \
+        bzip2 bzip2-devel xz xz-devel \
+        epel-release
+RUN set -eux; \
+    conda create -n py38 python=3.8 -y && \
+    conda create -n py39 python=3.9 -y && \
+    conda create -n py310 python=3.10 -y && \
+    conda create -n py311 python=3.11 -y && \
+    conda create -n py312 python=3.12 -y && \
+    ln -sf /opt/conda/envs/py38/bin/python3.8 /usr/bin/python3.8 && \
+    ln -sf /opt/conda/envs/py39/bin/python3.9 /usr/bin/python3.9 && \
+    ln -sf /opt/conda/envs/py310/bin/python3.10 /usr/bin/python3.10 && \
+    ln -sf /opt/conda/envs/py311/bin/python3.11 /usr/bin/python3.11 && \
+    ln -sf /opt/conda/envs/py312/bin/python3.12 /usr/bin/python3.12 && \
+    conda install -y cmake patchelf
+WORKDIR /tilelang
--- a/tilelang/transform/add_bufstore_wrapper.py
+++ b/tilelang/transform/add_bufstore_wrapper.py
 from tvm.tir import (BufferStore, For, AttrStmt, ForKind, Var, PrimFunc, BufferLoad, Buffer, IntImm)
 from tvm.tir.stmt_functor import ir_transform, post_order_visit
 from tvm.tir.transform import prim_func_pass
+from typing import Tuple, List, Dict
 def AddWrapperForSingleBufStore():
@@ -41,7 +42,7 @@ def AddWrapperForSingleBufStore():
            post_order_visit(operation, visit_variable)
            return used_variables
-        def collect_buffer_accesses(statement) -> tuple[list[Buffer], list[Buffer]]:
+        def collect_buffer_accesses(statement) -> Tuple[List[Buffer], List[Buffer]]:
            """
            Categorizes buffers accessed in the statement by their scope.
@@ -68,7 +69,7 @@ def AddWrapperForSingleBufStore():
                    local_buffers.append(buffer)
            return local_buffers, fragment_buffers
-        def collect_buffer_indices(statement) -> dict[Buffer, list[int]]:
+        def collect_buffer_indices(statement) -> Dict[Buffer, List[int]]:
            """
            Maps each buffer to its access indices.

--- a/tilelang/utils/sparse.py
+++ b/tilelang/utils/sparse.py
 import os
 import torch
 import warnings
-from typing import Optional
+from typing import Optional, Tuple
 from tilelang.contrib import nvcc
 from torch.utils.cpp_extension import load, _import_module_from_library
 from tilelang import env
@@ -44,7 +44,7 @@ def _get_cached_lib():
 def compress_sm90(A: torch.Tensor, block_k: int,
-                  transposed: bool) -> tuple[torch.Tensor, torch.Tensor]:
+                  transposed: bool) -> Tuple[torch.Tensor, torch.Tensor]:
    if block_k > 128:
        block_k = 128
        # Ref: https://github.com/NVIDIA/cutlass/blob/c2ad7c5b20f131c4ba33601860f1da3f9c9df0f3/include/cutlass/gemm/collective/builders/sm90_sparse_gmma_builder.inl#L145-L146
@@ -56,7 +56,7 @@ def compress_sm90(A: torch.Tensor, block_k: int,
    return compress_lib.compress_sm90(A, block_k, transposed)
-def compress_sm80(A: torch.Tensor, transposed: bool) -> tuple[torch.Tensor, torch.Tensor]:
+def compress_sm80(A: torch.Tensor, transposed: bool) -> Tuple[torch.Tensor, torch.Tensor]:
    try:
        from torch.sparse import to_sparse_semi_structured, SparseSemiStructuredTensor
    except ImportError as err:
@@ -76,7 +76,7 @@ def compress_sm80(A: torch.Tensor, transposed: bool) -> tuple[torch.Tensor, torc
 def compress(A: torch.Tensor,
             transposed: bool,
             arch: Optional[str] = None,
-             **kwargs) -> tuple[torch.Tensor, torch.Tensor]:
+             **kwargs) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Compress a tensor using the appropriate method based on the CUDA architecture.
    """

--- a/tox.ini
+++ b/tox.ini
 [tox]
 envlist = py38,py39,py310,py311,py312
-isolated_build = True
+isolated_build = False
 [testenv:py{38,39,310,311,312}]
+skip_install = false
 deps =
    wheel
    build
+setenv =
+    PYTHON_EXECUTABLE = {envpython}
+    Python3_EXECUTABLE = {envpython}
 commands =
    python -m build --wheel -o {toxinidir}/dist
 [testenv:py{38,39,310,311,312}-pypi]
+skip_install = false
 setenv =
    PYPI_BUILD = TRUE
+    PYTHON_EXECUTABLE = {envpython}
+    Python3_EXECUTABLE = {envpython}
 commands =
-    python setup.py bdist_wheel
+    python setup.py bdist_wheel --plat-name=manylinux2014_x86_64
-[testenv:audit_2_27]
+[testenv:audit_manylinux2014]
 skip_install = true
 allowlist_externals =
    bash
@@ -24,7 +30,7 @@ deps =
    auditwheel
    patchelf
 commands =
-    bash -c 'auditwheel repair -L=/lib --exclude=/usr/local/cuda* --exclude=libcuda.so.1 --plat=manylinux_2_27_x86_64 dist/*'
+    bash -c 'auditwheel repair -L=/lib --exclude=/usr/local/cuda* --exclude=libcuda.so.1 --plat=manylinux2014_x86_64 dist/*'
 [testenv:py38]
 basepython = python3.8