Unverified Commit bbbf4207 authored by guchaoyang's avatar guchaoyang Committed by GitHub
Browse files

Merge branch 'main' into dcu

parents 8f4628e0 5eb30a4f
set -eux
#!/usr/bin/env bash
set -euxo pipefail
# Get the CUDA version from the command line
IMAGE="tilelang-builder:manylinux"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
script="sh maint/scripts/local_distribution.sh"
docker run --rm -v $(pwd):/tilelang ${IMAGE} /bin/bash -c "$script"
# Build for local architecture
CIBW_BUILD='cp38-*' cibuildwheel .
set -eux
#!/usr/bin/env bash
set -euxo pipefail
# Get the CUDA version from the command line
IMAGE="tilelang-builder:manylinux"
docker build . -f "$(dirname "${BASH_SOURCE[0]}")/pypi.manylinux.Dockerfile" --tag ${IMAGE}
if docker buildx version >/dev/null 2>&1; then
if docker info >/dev/null 2>&1; then
docker run --rm --privileged tonistiigi/binfmt --install amd64,arm64 >/dev/null 2>&1 || true
fi
script="sh maint/scripts/pypi_distribution.sh"
if ! docker buildx inspect multi >/dev/null 2>&1; then
docker buildx create --name multi --driver docker-container --use >/dev/null 2>&1 || true
else
docker buildx use multi >/dev/null 2>&1 || true
fi
docker buildx inspect --bootstrap >/dev/null 2>&1 || true
done
docker run --rm -v $(pwd):/tilelang -w /tilelang ${IMAGE} /bin/bash -c "$script"
export CIBW_ARCHS='x86_64 aarch64'
fi
NO_VERSION_LABEL=ON CIBW_BUILD='cp38-*' cibuildwheel .
FROM pytorch/manylinux2_28-builder:cuda12.1 AS builder_amd64
ENV CUDA_VERSION=12.1 \
AUDITWHEEL_PLAT=manylinux_2_28_x86_64
RUN pip3 install uv
FROM quay.io/pypa/manylinux2014_x86_64 AS builder_amd64
FROM pytorch/manylinuxaarch64-builder:cuda12.8 AS builder_arm64
ENV CUDA_VERSION=12.8 \
AUDITWHEEL_PLAT=manylinux_2_28_aarch64
RUN yum-config-manager --add-repo https://developer.download.nvidia.cn/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
ARG CUDA_VERSION=12.1
ENV CUDA_VERSION=${CUDA_VERSION}
FROM quay.io/pypa/manylinux_2_28_aarch64 AS builder_arm64
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
ARG CUDA_VERSION=12.8
ENV CUDA_VERSION=${CUDA_VERSION}
ARG TARGETARCH
FROM builder_${TARGETARCH}
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Etc/UTC
RUN set -eux; \
uv venv -p 3.12 --seed /venv; \
git config --global --add safe.directory '/tilelang'
ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV PATH="/venv/bin:$PATH" \
VIRTUAL_ENV=/venv
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
RUN uv pip install build wheel
RUN set -eux; \
pipx install cibuildwheel; \
git config --global --add safe.directory '/tilelang'
WORKDIR /tilelang
set -eux
rm -rf dist
rm -rf dist raw_dist
python -mpip install -U pip
python -mpip install -U build wheel auditwheel patchelf
......
This diff is collapsed.
# Requirements to run local build with `--no-build-isolation` or other developments
apache-tvm-ffi~=0.1.0
build
cmake>=3.26
cython>=3.0.0
......
......@@ -3,5 +3,5 @@ pre-commit
clang-format==21.1.2
clang-tidy==21.1.1
codespell[toml]==2.4.1
ruff==0.14.1
ruff==0.14.3
yapf==0.43.0
......@@ -18,10 +18,11 @@ cython
docutils
dtlib
einops
flash-linear-attention==0.3.2
packaging>=21.0
pytest-xdist>=2.2.1
pytest-durations
pytest-timeout
pytest-xdist>=2.2.1
pytest>=6.2.4
pyyaml
requests
......
# Runtime requirements
apache-tvm-ffi~=0.1.0
cloudpickle
ml-dtypes
numpy>=1.23.5
......@@ -7,4 +8,3 @@ torch
torch>=2.7; platform_system == 'Darwin'
tqdm>=4.62.3
typing-extensions>=4.10.0
flash-linear-attention==0.3.2
\ No newline at end of file
This diff is collapsed.
......@@ -594,11 +594,11 @@ Layout MakeGemmVoltaBLayoutCongruous(int stride, int continuous) {
Layout makeGemmVoltaABLayout(int stride, int continuous, bool is_a,
bool k_inner) {
if (k_inner)
if (k_inner && continuous % 32 == 0 && stride % 32 == 0)
return MakeGemmVoltaABLayoutCrosswise(stride, continuous);
if (is_a && continuous % 64 == 0)
if (is_a && continuous % 64 == 0 && stride % 4 == 0)
return MakeGemmVoltaALayoutCongruous(stride, continuous);
if (!is_a && continuous % 64 == 0)
if (!is_a && continuous % 64 == 0 && stride % 4 == 0)
return MakeGemmVoltaBLayoutCongruous(stride, continuous);
return makeGemmABLayoutPadded(stride, continuous, 16);
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -9,6 +9,8 @@
#include <tvm/arith/iter_affine_map.h>
#include "../support/ffi_aliases.h"
namespace tvm {
namespace tl {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment