Commit cf6e11c9 authored by qisan's avatar qisan
Browse files

feat: merge dcu branch features

parents 3f27f85a d0436b7b
Pipeline #3369 failed with stages
in 0 seconds
---
BasedOnStyle: LLVM
UseTab: Never
IndentWidth: 2
ColumnLimit: 80
Language: Cpp
Standard: c++17
---
InheritParentConfig: true
ExtraArgs: []
FormatStyle: file
UseColor: true
WarningsAsErrors: '*'
# FIXME: Use `ExcludeHeaderFilterRegex` instead when all maintainers upgraded their `clang-tidy`
HeaderFilterRegex: '^(?!.*(?:/|^)(3rdparty|tvm)/).*'
# ExcludeHeaderFilterRegex: '^(3rdparty|tvm)/.*$'
# NOTE: there must be no spaces before the '-', so put the comma last.
Checks: >-
# 1. Retained categories: easier to find bugs/performance issues
clang-analyzer-*,
cppcoreguidelines-pro-type-static-cast-downcast,
cppcoreguidelines-pro-type-member-init,
cppcoreguidelines-pro-bounds-array-to-pointer-decay,
cppcoreguidelines-pro-bounds-pointer-arithmetic,
cppcoreguidelines-slicing,
cppcoreguidelines-narrowing-conversions,
performance-*,
# 2. Readability: only keep useful rules
readability-braces-around-statements,
readability-container-size-empty,
readability-delete-null-pointer,
readability-redundant-member-init,
readability-redundant-smartptr-get,
readability-redundant-string-cstr,
# 3. Disable all intrusive/style-breaking rules
-readability-identifier-length,
-readability-avoid-const-params-in-decls,
-readability-else-after-return,
-cppcoreguidelines-avoid-magic-numbers,
-modernize-use-trailing-return-type,
-modernize-use-nodiscard,
-modernize-use-auto,
-modernize-pass-by-value,
-modernize-return-braced-init-list,
-modernize-use-default-member-init,
-modernize-loop-convert,
-modernize-concat-nested-namespaces,
-llvm-include-order,
-bugprone-unused-return-value,
-clang-diagnostic-unused-result,
-cppcoreguidelines-special-member-functions,
-performance-noexcept-move-constructor,
-cppcoreguidelines-narrowing-conversions,
-clang-diagnostic-error,
-cppcoreguidelines-pro-type-member-init,
-clang-analyzer-optin.cplusplus.UninitializedObject,
-cppcoreguidelines-pro-type-static-cast-downcast,
-performance-unnecessary-value-param,
-performance-enum-size,
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
-clang-analyzer-deadcode.DeadStores,
-clang-analyzer-optin.cplusplus.VirtualCall,
-clang-diagnostic-tautological-constant-compare,
# https://editorconfig.org/
root = true
[*]
charset = utf-8
end_of_line = lf
indent_style = space
indent_size = 4
trim_trailing_whitespace = true
insert_final_newline = true
[*.{py,pyi}]
indent_size = 4
[*.{cpp,hpp,cxx,cc,c,h,cu,cuh}]
indent_size = 2
[{*.cmake,CMakeLists.txt}]
indent_size = 2
[*.{yaml,yml}]
indent_size = 2
[.clang-{format,tidy}]
indent_size = 2
[Makefile]
indent_style = tab
[*.sh]
indent_size = 4
[*.bat]
indent_size = 4
end_of_line = crlf
[*.md]
indent_size = 2
x-soft-wrap-text = true
[*.rst]
indent_size = 4
x-soft-wrap-text = true
* text eol=lf
*.bat eol=crlf
*.svg binary
*.jpg binary
*.jpeg binary
*.png binary
*.gif binary
*.h linguist-language=C++
name: 🐛 Bug Report
description: File an issue about a bug.
title: "[BUG] "
labels: [bug]
assignees: []
body:
- type: markdown
attributes:
value: >-
Please do your best to make the issue as easy to act on as possible,
and only submit here if there is clearly a problem with TileLang.
- type: checkboxes
id: steps
attributes:
label: Required prerequisites
description: Make sure you've completed the following steps before submitting your issue -- thank you!
options:
- label: I have read the documentation <https://tilelang.com>.
required: true
- label: >-
I have searched the [Issue Tracker](https://github.com/tile-ai/tilelang/issues)
that this hasn't already been reported. (comment there if it has.)
required: true
- type: input
id: version
attributes:
label: What version of TileLang are you using?
description: >-
Run command `python3 -c 'print(__import__("tilelang").__version__)'` in your shell
and paste the output here.
placeholder: E.g., 0.1.5
validations:
required: true
- type: textarea
id: system-info
attributes:
label: System information
description: |
Describe the characteristic of your environment:
- Describe how the library was installed (pip, conda, source, ...)
- Python version
- Versions of any other relevant libraries
```python
import sys, tilelang, torch
print(sys.version, sys.platform)
print(tilelang.__version__)
print(torch.__version__)
```
```bash
python3 -m torch.utils.collect_env
```
validations:
required: true
- type: textarea
id: description
attributes:
label: Problem description
description: >-
Provide a short description, state the expected behavior and what actually happens. Include
relevant information like what version of TileLang you are using, what system you are on, and
any useful commands / output.
validations:
required: true
- type: textarea
id: code
attributes:
label: Reproducible example code
description: >-
The code should be minimal, have minimal external dependencies, and isolate the functions
that cause breakage. Submit matched and complete snippets that can be easily run to diagnose
the issue.
value: |
The Python snippets:
```python
```
validations:
required: true
- type: textarea
id: traceback
attributes:
label: Traceback
description: Put the Python traceback information here.
placeholder: |
Traceback (most recent call last):
File ...
render: pytb
- type: textarea
id: expected
attributes:
label: Expected behavior
description: Provide a clear and concise description of what you expected to happen.
- type: textarea
id: additional-context
attributes:
label: Additional context
description: >-
Add any other context about the problem here. Screenshots may also be helpful.
If you know or suspect the reason for this bug, paste the code lines and suggest modifications.
blank_issues_enabled: true
name: ✨ Feature Request
description: Suggest an idea for this project.
title: "[Feature Request] "
labels: [enhancement]
body:
- type: checkboxes
id: steps
attributes:
label: Required prerequisites
description: Make sure you've completed the following steps before submitting your issue -- thank you!
options:
- label: >-
I have searched the [Issue Tracker](https://github.com/tile-ai/tilelang/issues)
that this hasn't already been reported. (comment there if it has.)
required: true
- type: textarea
id: motivation
attributes:
label: Motivation
description: Outline the motivation for the proposal.
value: |
<!-- Please outline the motivation for the proposal.
Is your feature request related to a problem? E.g., "I'm always frustrated when [...]".
If this is related to another issue, please link here too. -->
validations:
required: true
- type: textarea
id: solution
attributes:
label: Solution
description: Provide a clear and concise description of what you want to happen.
- type: textarea
id: alternatives
attributes:
label: Alternatives
description: A clear and concise description of any alternative solutions or features you've considered.
- type: textarea
id: additional-context
attributes:
label: Additional context
description: Add any other context about the problem here. Screenshots may also be helpful.
name: 🤔 Questions / Help / Support
description: Do you need support?
title: "[Question] "
labels: [question]
body:
- type: checkboxes
id: steps
attributes:
label: Required prerequisites
description: Make sure you've completed the following steps before submitting your issue -- thank you!
options:
- label: I have read the documentation <https://tilelang.com>.
required: true
- label: >-
I have searched the [Issue Tracker](https://github.com/tile-ai/tilelang/issues)
that this hasn't already been reported. (comment there if it has.)
required: true
- type: textarea
id: questions
attributes:
label: Questions
description: Describe your questions with relevant resources such as snippets, links, images, etc.
validations:
required: true
name: "Release Plan"
description: "Plan the next release"
title: "[Release Plan] vX.Y.Z"
labels:
- release-plan
- tracking
assignees: []
body:
- type: input
id: version
attributes:
label: "Version"
placeholder: "v0.2.0"
validations:
required: true
- type: input
id: milestone
attributes:
label: "Milestone"
description: "Link or name of the milestone for this release"
placeholder: "https://github.com/tile-ai/tilelang/milestone/XX"
- type: textarea
id: scope
attributes:
label: "Scope"
description: "Goals and non-goals (brief)"
placeholder: |
- Goals: ...
- Non-goals: ...
- type: textarea
id: tasks
attributes:
label: "Tasks"
description: "Task list; link issues/PRs"
value: |
- [ ] Features
- [ ] Fixes
- [ ] Docs
- [ ] API/Breaking changes
- [ ] Benchmarks
- [ ] Release notes
- type: checkboxes
id: readiness
attributes:
label: "Readiness"
options:
- label: "All planned issues closed or deferred"
- label: "Docs updated"
- label: "CI green; artifacts verified"
- label: "Release notes drafted"
- type: textarea
id: notes
attributes:
label: "Notes"
description: "Risks or communications (optional)"
placeholder: |
- Risk: ...
- Communication: ...
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
day: "monday"
time: "12:00"
timezone: "Asia/Shanghai"
commit-message:
prefix: "[CI]"
name: CI
on:
pull_request:
types:
- labeled
- unlabeled
- opened
- synchronize
- reopened
# Allow to trigger the workflow manually
workflow_dispatch:
permissions:
contents: read
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
CLANG_TIDY_CMAKE_OPTIONS: "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON" # to be updated
PYTHONDEVMODE: "1"
PYTHONUNBUFFERED: "1"
PYTHONPATH: "" # explicit cleanup
PIP_USER: "" # explicit cleanup
COLUMNS: "100"
FORCE_COLOR: "1"
CLICOLOR_FORCE: "1"
UV_INDEX_STRATEGY: "unsafe-best-match"
UV_HTTP_TIMEOUT: "600"
XDG_CACHE_HOME: "${{ github.workspace }}/.cache" # to be updated
PIP_CACHE_DIR: "${{ github.workspace }}/.cache/pip" # to be updated
UV_CACHE_DIR: "${{ github.workspace }}/.cache/uv" # to be updated
PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pip/.pre-commit" # to be updated
jobs:
lint:
name: Quick Lint
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: recursive
- name: Setup Python 3.8
id: setup-pylowest
uses: actions/setup-python@v6
with:
python-version: "3.8" # use lowest supported version for linting
update-environment: false
- name: Check AST with Python 3.8
run: |
"${{ steps.setup-pylowest.outputs.python-path }}" -m compileall -q -f tilelang
- name: Setup Python 3.9
uses: actions/setup-python@v6
with:
python-version: "3.9"
update-environment: true
cache: pip
cache-dependency-path: |
pyproject.toml
requirements*.txt
.pre-commit-config.yaml
- name: Pre-commit Lint
run: |
if ! pipx run pre-commit run --all-files --color=always --show-diff-on-failure; then
echo "::error::Pre-commit checks failed. Please run 'pre-commit install' and 'pre-commit run --all-files' locally to see the issues."
exit 1
fi
tests:
name: Test for Python ${{ matrix.python-version }} with ${{ matrix.runner.toolkit }} (on ${{ matrix.runner.name }})
if: |
github.repository_owner == 'tile-ai' &&
(github.event_name != 'pull_request' || !github.event.pull_request.draft)
needs: [lint]
runs-on: ${{ matrix.runner.tags }}
strategy:
matrix:
runner:
- tags: [self-hosted, nvidia]
name: self-hosted-nvidia
# Format: [Nightly-]CUDA-<major>.<minor>[.<patch>]. E.g., "CUDA-12.8" or "Nightly-CUDA-13.0".
# Use "Nightly-" prefix to use torch nightly builds.
toolkit: CUDA-12.8
- tags: [self-hosted, amd, gpu]
name: self-hosted-amd
# Format: [Nightly-]ROCm-<major>.<minor>[.<patch>]. E.g., "ROCm-6.4" or "Nightly-ROCm-7.0".
# Use "Nightly-" prefix to use torch nightly builds.
toolkit: Nightly-ROCm-7.1
- tags: [macos-latest]
name: macos-latest
toolkit: Metal # or Nightly-Metal
python-version:
- "3.12"
fail-fast: false
timeout-minutes: 120
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: recursive
- name: Set environment (self-hosted runners)
if: startsWith(matrix.runner.name, 'self-hosted')
run: |
# Hide sensitive data in logs for self-hosted runners
if [[ -n "${{ secrets.SECRET_PATH_PREFIXES }}" ]]; then
echo "::add-mask::${{ secrets.SECRET_PATH_PREFIXES }}"
# Colon separated list of secrets to mask
for secret in $(echo "${{ secrets.SECRET_PATH_PREFIXES }}" | tr ':' '\n'); do
echo "::add-mask::${secret}"
done
fi
# Use runner tool_cache as cache root for self-hosted runners to avoid internet connection
# issues and to share cache between jobs.
export XDG_CACHE_HOME="${{ runner.tool_cache }}/.ci-cache-${{ github.workflow }}"
echo "XDG_CACHE_HOME=${XDG_CACHE_HOME}" | tee -a "${GITHUB_ENV}"
echo "PIP_CACHE_DIR=${XDG_CACHE_HOME}/pip" | tee -a "${GITHUB_ENV}"
echo "UV_CACHE_DIR=${XDG_CACHE_HOME}/uv" | tee -a "${GITHUB_ENV}"
echo "PRE_COMMIT_HOME=${XDG_CACHE_HOME}/pip/.pre-commit" | tee -a "${GITHUB_ENV}"
# Do not use ccache on self-hosted runners, as it will download/upload caches which is slow.
# Self-hosted runners usually have more CPU power to compile without ccache.
- name: Setup ccache (GitHub-hosted runners)
id: setup-ccache
if: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
uses: hendrikmuhs/ccache-action@v1
with:
create-symlink: true
evict-old-files: "7d"
append-timestamp: false
key: ${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }}
restore-keys: |
${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }}
${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}
${{ runner.os }}-${{ runner.arch }}
- name: Set environment (CUDA)
if: contains(matrix.runner.toolkit, 'CUDA')
run: |
TOOLKIT="${{ matrix.runner.toolkit }}"
CUDA_VERSION="${TOOLKIT##*-}"
CUDA_VERSION_MAJMIN="$(echo ${CUDA_VERSION} | cut -d '.' -f-2)"
CUDA_VERSION_MAJMIN_NODOT="${CUDA_VERSION_MAJMIN//./}"
if [[ "${TOOLKIT}" == "Nightly-"* ]]; then
# Use torch nightly builds
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MAJMIN_NODOT}"
else
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MAJMIN_NODOT}"
fi
export UV_INDEX="${PIP_EXTRA_INDEX_URL}"
export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_CUDA=ON"
echo "USE_CUDA=ON" | tee -a "${GITHUB_ENV}"
echo "CUDA_VERSION=${CUDA_VERSION}" | tee -a "${GITHUB_ENV}"
echo "CUDA_VERSION_MAJMIN=${CUDA_VERSION_MAJMIN}" | tee -a "${GITHUB_ENV}"
echo "CUDA_VERSION_MAJMIN_NODOT=${CUDA_VERSION_MAJMIN_NODOT}" | tee -a "${GITHUB_ENV}"
echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" | tee -a "${GITHUB_ENV}"
echo "UV_INDEX=${UV_INDEX}" | tee -a "${GITHUB_ENV}"
echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" | tee -a "${GITHUB_ENV}"
if [[ ! -x "$(command -v nvcc)" ]]; then
export PATH="/usr/local/cuda/bin:${PATH}"
export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
echo "PATH=${PATH}" | tee -a "${GITHUB_ENV}"
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" | tee -a "${GITHUB_ENV}"
fi
if [[ -x "$(command -v nvcc)" ]]; then
echo "\$ $(command -v nvcc) --version" && nvcc --version
else
echo "::warning::nvcc not found in PATH!"
fi
- name: Set environment (ROCm)
if: contains(matrix.runner.toolkit, 'ROCm')
run: |
TOOLKIT="${{ matrix.runner.toolkit }}"
ROCM_VERSION="${TOOLKIT##*-}"
ROCM_VERSION_MAJMIN="$(echo ${ROCM_VERSION} | cut -d '.' -f-2)"
ROCM_VERSION_MAJMIN_NODOT="${ROCM_VERSION_MAJMIN//./}"
if [[ "${TOOLKIT}" == "Nightly-"* ]]; then
# Use torch nightly builds
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/rocm${ROCM_VERSION_MAJMIN}"
else
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/rocm${ROCM_VERSION_MAJMIN}"
fi
export UV_INDEX="${PIP_EXTRA_INDEX_URL}"
export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_ROCM=ON"
echo "USE_ROCM=ON" | tee -a "${GITHUB_ENV}"
echo "ROCM_VERSION=${ROCM_VERSION}" | tee -a "${GITHUB_ENV}"
echo "ROCM_VERSION_MAJMIN=${ROCM_VERSION_MAJMIN}" | tee -a "${GITHUB_ENV}"
echo "ROCM_VERSION_MAJMIN_NODOT=${ROCM_VERSION_MAJMIN_NODOT}" | tee -a "${GITHUB_ENV}"
echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" | tee -a "${GITHUB_ENV}"
echo "UV_INDEX=${UV_INDEX}" | tee -a "${GITHUB_ENV}"
echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" | tee -a "${GITHUB_ENV}"
if [[ ! -x "$(command -v hipcc)" ]]; then
export PATH="/opt/rocm/bin:${PATH}"
export LD_LIBRARY_PATH="/opt/rocm/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
echo "PATH=${PATH}" | tee -a "${GITHUB_ENV}"
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" | tee -a "${GITHUB_ENV}"
fi
if [[ -x "$(command -v hipcc)" ]]; then
echo "\$ $(command -v hipcc) --version" && hipcc --version
else
echo "::warning::hipcc not found in PATH!"
fi
- name: Set environment (Metal)
if: contains(matrix.runner.toolkit, 'Metal')
run: |
if [[ "${{ matrix.runner.toolkit }}" == "Nightly-"* ]]; then
# Use torch nightly builds
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/cpu"
export UV_INDEX="${PIP_EXTRA_INDEX_URL}"
echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" | tee -a "${GITHUB_ENV}"
echo "UV_INDEX=${UV_INDEX}" | tee -a "${GITHUB_ENV}"
fi
export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_METAL=ON"
echo "USE_METAL=ON" | tee -a "${GITHUB_ENV}"
echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" | tee -a "${GITHUB_ENV}"
- name: Setup Python and uv with caching
id: setup-uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}
activate-environment: true
# Do not use cache for self-hosted runners, as it will download/upload caches which is slow.
enable-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
prune-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
# Use runner tool_cache for self-hosted runners
cache-local-path: ${{ env.UV_CACHE_DIR }}
ignore-nothing-to-cache: true
# Extra cache key to upload/download caches on GitHub-hosted runners
cache-suffix: uv-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ matrix.runner.name }}-${{ matrix.runner.toolkit }}
cache-dependency-glob: |
pyproject.toml
requirements*.txt
.pre-commit-config.yaml
- name: Setup venv
id: setup-venv
run: |
set -o pipefail
uv pip install --upgrade pip setuptools wheel
if [[ "${UV_INDEX}" == *"/nightly/"* ]]; then
uv pip install --prerelease=allow -v torch
fi
uv pip install -v -r requirements-test.txt
echo "import torch; print(f'torch: {torch.__version__}')" | uv run --no-project --script -
if [[ "${{ matrix.runner.toolkit }}" == *"CUDA"* ]]; then
uv pip install --no-build-isolation-package=flash-attn -v -r requirements-test-cuda.txt
echo "import flash_attn; print(f'flash_attn: {flash_attn.__version__}')" | uv run --no-project --script -
elif [[ "${{ matrix.runner.toolkit }}" == *"ROCm"* ]]; then
uv pip install -v -r requirements-test-rocm.txt
elif [[ "${{ matrix.runner.toolkit }}" == *"Metal"* ]]; then
uv pip install -v -r requirements-test-metal.txt
else
echo "::error::Unknown toolkit: ${{ matrix.runner.toolkit }}"
exit 1
fi
echo "::group::torch.utils.collect_env"
uv run --no-project -m -- torch.utils.collect_env
echo "::endgroup::"
- name: Clear uv cache for self-hosted runners (if setup failed)
if: >-
${{
failure() &&
startsWith(matrix.runner.name, 'self-hosted') &&
(steps.setup-uv.conclusion == 'failure' || steps.setup-venv.conclusion == 'failure')
}}
run: |
echo "Clearing uv cache at ${UV_CACHE_DIR} due to failure."
uv cache clean
- name: Enable core dump generation (Linux / GitHub-hosted runners)
if: ${{ runner.os == 'Linux' && !startsWith(matrix.runner.name, 'self-hosted') }}
run: |
sudo sysctl -w kernel.core_pattern="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P"
sudo sysctl -w kernel.core_uses_pid=0
sudo sysctl -w fs.suid_dumpable=1
sysctl kernel.core_pattern kernel.core_uses_pid fs.suid_dumpable
- name: Enable core dump generation (macOS / GitHub-hosted runners)
if: ${{ runner.os == 'macOS' && !startsWith(matrix.runner.name, 'self-hosted') }}
run: |
sudo sysctl -w kern.corefile="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P"
sudo sysctl -w kern.coredump=1
sudo sysctl -w kern.sugid_coredump=1
sysctl kern.corefile kern.coredump kern.sugid_coredump
- name: Install project (wheel form)
run: |
uv pip install -v .
- name: Run clang-tidy
id: clang-tidy
if: runner.os == 'Linux'
run: |
echo "\$ $(command -v clang-tidy) --version" && clang-tidy --version
# Download run-clang-tidy script
RCT_URL=https://raw.githubusercontent.com/llvm/llvm-project/refs/heads/release/21.x/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
echo "Downloading run-clang-tidy script from ${RCT_URL}"
echo "import urllib.request; url = '${RCT_URL}'.rstrip('/'); urllib.request.urlretrieve(url, url.split('/')[-1])" | uv run --no-project --script -
RUN_CLANG_TIDY=(uv run --no-project --script -- run-clang-tidy.py)
if [[ -x "$(command -v clang-apply-replacements)" ]]; then
echo "Using clang-apply-replacements from $(command -v clang-apply-replacements)"
RUN_CLANG_TIDY+=(-fix -clang-apply-replacements-binary="$(command -v clang-apply-replacements)")
else
echo "::warning::clang-apply-replacements not found in PATH, automatic fixing disabled."
fi
# Run cmake to create the build directory with compile_commands.json
cmake -S . -B cmake-build --fresh ${CLANG_TIDY_CMAKE_OPTIONS} # no quotes here
echo "::group::compile_commands.json"
ls -alh cmake-build/compile_commands.json
uv run --no-project -m -- json.tool --no-ensure-ascii cmake-build/compile_commands.json
echo "::endgroup::"
CXX_FILES=$(find src -type f -iname "*.[ch]pp" -o -iname "*.cc" -o -iname "*.c" -o -iname "*.h")
rc=0
echo "::group::run-clang-tidy"
"${RUN_CLANG_TIDY[@]}" -clang-tidy-binary="$(command -v clang-tidy)" \
-exclude-header-filter='^(3rdparty|tvm)/.*$' \
-p="cmake-build" ${CXX_FILES} || rc="$?"
echo "::endgroup::"
rm -rf cmake-build run-clang-tidy.py
if (( rc != 0 )); then
echo "::error::clang-tidy found issues (exit code: ${rc}). Please run 'clang-tidy --fix' locally to fix them."
git diff --color=always || true
exit "${rc}"
fi
- name: Run examples with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }})
if: contains(matrix.runner.toolkit, 'CUDA')
run: |
cd testing
PYTEST=(
uv run --no-project -m --
pytest --verbose --color=yes --durations=0 --showlocals --cache-clear
)
"${PYTEST[@]}" --maxfail=3 --numprocesses=4 \
../examples
# NVIDIA CUDA tests
- name: Run CUDA tests with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }})
id: cuda-tests
if: contains(matrix.runner.toolkit, 'CUDA')
run: |
cd testing
PYTEST=(
uv run --no-project -m --
pytest --verbose --color=yes --durations=0 --showlocals --cache-clear
)
"${PYTEST[@]}" --maxfail=3 --numprocesses=4 \
--ignore=./python/jit/test_tilelang_jit_cutedsl.py \
./python
# CuTeDSL JIT tests require GEMM v1 (must be set before importing tilelang).
# Run them in a dedicated step to avoid changing the default GEMM selection
# (and to keep the rest of the CUDA tests on GEMM v2).
- name: Run CuTeDSL JIT tests (GEMM v1) with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }})
id: cutedsl-tests
if: contains(matrix.runner.toolkit, 'CUDA')
env:
TILELANG_USE_GEMM_V1: "1"
run: |
cd testing
PYTEST=(
uv run --no-project -m --
pytest --verbose --color=yes --durations=0 --showlocals --cache-clear
)
# Avoid xdist contention on a single GPU by running this file in one worker.
"${PYTEST[@]}" --maxfail=3 --numprocesses=1 \
./python/jit/test_tilelang_jit_cutedsl.py
# AMD ROCm tests
- name: Run ROCm tests with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }})
id: rocm-tests
if: contains(matrix.runner.toolkit, 'ROCm')
run: |
cd testing
PYTEST=(
uv run --no-project -m --
pytest --verbose --color=yes --durations=0 --showlocals --cache-clear
)
"${PYTEST[@]}" --maxfail=3 --numprocesses=4 \
./python/amd
# Apple Metal tests
- name: Run Metal tests with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }})
id: metal-tests
if: contains(matrix.runner.toolkit, 'Metal')
run: |
cd testing
PYTEST=(
uv run --no-project -m --
pytest --verbose --color=yes --durations=0 --showlocals --cache-clear
)
"${PYTEST[@]}" --maxfail=3 --numprocesses=4 \
-k metal \
./python
- name: List generated files
if: ${{ !cancelled() }}
run: |
find . -type f -name '*.py[co]' -delete
find . -depth -type d -name "__pycache__" -exec rm -r "{}" +
if git status --ignored --porcelain | grep -qvE '/$'; then
ls -alh $(git status --ignored --porcelain | grep -vE '/$' | grep -oE '\S+$')
fi
name: Dist
on:
workflow_dispatch:
schedule:
# gemini said this is 6:00 china time
- cron: "0 22 * * *"
pull_request:
types:
- opened
- synchronize
- reopened
- ready_for_review
paths:
- setup.py
- setup.cfg
- pyproject.toml
- MANIFEST.in
- CMakeLists.txt
- version_provider.py
- .github/workflows/dist.yml
release:
types:
- published
permissions:
contents: read
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
env:
PYTHONDEVMODE: "1"
PYTHONUNBUFFERED: "1"
COLUMNS: "100"
FORCE_COLOR: "1"
CLICOLOR_FORCE: "1"
jobs:
build-sdist:
name: Build SDist
if: |
github.repository_owner == 'tile-ai' &&
(github.event_name != 'pull_request' || !github.event.pull_request.draft)
runs-on: macos-latest
timeout-minutes: 30
env:
# `NO_VERSION_LABEL=ON` disables embedding the toolchain / git commit hash in version metadata.
# Otherwise, the version of the SDist has a git hash suffix (e.g., 0.1.0+gitabcdef12),
# but the package built from the SDist has no way to get the git hash (it is not a git repo),
# leading to inconsistent versions between SDist and built packages (+gitabcdef12 vs. +gitunknown).
NO_VERSION_LABEL: 'ON'
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 1
submodules: recursive
- name: Setup Python and uv with caching
id: setup-uv
uses: astral-sh/setup-uv@v7
with:
python-version: "3.12"
activate-environment: true
- name: Build SDist
run: |
uv run --no-project --with=build -m -- build --sdist --outdir=dist
- name: Setup ccache
uses: hendrikmuhs/ccache-action@v1
with:
create-symlink: true
evict-old-files: "7d"
append-timestamp: false
key: sdist-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/*.cc') }}
restore-keys: |
sdist-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/*.cc') }}
sdist-${{ runner.os }}-${{ runner.arch }}
${{ runner.os }}-${{ runner.arch }}
- name: Test SDist buildable
run: |
TEMP_DIR="$(mktemp -d -t tilelang-sdist-test)"
cp -r dist "${TEMP_DIR}/dist"
cd "${TEMP_DIR}"
uv pip install -v dist/*.tar.gz
python3 -c "import tilelang; print(tilelang.__version__)"
- name: Upload SDist
# Not PR to save artifact storage, as SDist is only needed for releases.
if: github.event_name != 'pull_request' || contains(github.event.pull_request.title, '[Release]')
uses: actions/upload-artifact@v6
with:
name: sdist
path: dist/*.tar.gz
if-no-files-found: error
build-wheels:
name: Build wheels for Python ${{ matrix.python-version }} on ${{ matrix.target.runner }} with ${{ matrix.target.toolkit }}
if: |
github.repository_owner == 'tile-ai' &&
(github.event_name != 'pull_request' || !github.event.pull_request.draft)
strategy:
matrix:
target:
- { runner: ubuntu-latest, toolkit: "CUDA-12.8" }
- { runner: ubuntu-24.04-arm, toolkit: "CUDA-12.8" }
- { runner: macos-latest, toolkit: "Metal" }
python-version:
# Wheels are built with Python 3.8 Limited API, they should work with all Python >= 3.8.
# Only build wheels against Python 3.8 Limited API to save CI resources.
- "3.9"
fail-fast: false
timeout-minutes: 120
runs-on: ${{ matrix.target.runner }}
env:
NO_VERSION_LABEL: ${{ github.event_name == 'release' && 'OFF' || 'ON' }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 1
submodules: recursive
- name: Setup ccache
uses: hendrikmuhs/ccache-action@v1
with:
create-symlink: true
evict-old-files: "7d"
append-timestamp: false
key: wheel-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}-${{ hashFiles('**/*.cc') }}
restore-keys: |
wheel-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}-${{ hashFiles('**/*.cc') }}
wheel-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}
wheel-${{ runner.os }}-${{ runner.arch }}
${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}
${{ runner.os }}-${{ runner.arch }}
- name: Set CIBW_BUILD
run: |
PYTHON_VERSION="${{ matrix.python-version }}"
PYTHON_VERSION_MAJMIN="$(echo "${PYTHON_VERSION}" | cut -d '.' -f-2)"
PYTHON_VERSION_MAJMIN_NODOT="${PYTHON_VERSION_MAJMIN//./}"
echo "CIBW_BUILD=cp${PYTHON_VERSION_MAJMIN_NODOT}-*" | tee -a "${GITHUB_ENV}"
if [[ "${{ matrix.target.toolkit }}" == *"CUDA"* ]]; then
CUDA_VERSION="${{ matrix.target.toolkit }}"
CUDA_VERSION="${CUDA_VERSION#CUDA-}"
echo "CUDA_VERSION=${CUDA_VERSION}" | tee -a "${GITHUB_ENV}"
fi
if [[ "${{ runner.os }}" == "Linux" ]]; then
HOST_CCACHE_DIR="$(ccache --get-config cache_dir)"
echo "CIBW_BEFORE_BUILD_LINUX=yum install -y ccache && ccache -o cache_dir=/host${HOST_CCACHE_DIR}" | tee -a "${GITHUB_ENV}"
fi
- name: Build wheels
uses: pypa/cibuildwheel@v3.3
with:
package-dir: .
output-dir: wheelhouse
config-file: "{package}/pyproject.toml"
- name: Upload wheels
# Not PR to save artifact storage, as wheels are only needed for releases.
if: github.event_name != 'pull_request' || contains(github.event.pull_request.title, '[Release]')
uses: actions/upload-artifact@v6
with:
name: wheels-${{ matrix.python-version }}-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}
path: wheelhouse/*.whl
if-no-files-found: error
list-artifacts:
name: List artifacts
# Not PR to save artifact storage, as artifacts are only needed for releases.
if: github.event_name != 'pull_request' || contains(github.event.pull_request.title, '[Release]')
runs-on: ubuntu-latest
needs: [build-sdist, build-wheels]
timeout-minutes: 15
steps:
- name: Download built SDist
uses: actions/download-artifact@v7
with:
# unpacks default artifact into dist/
# if `name: artifact` is omitted, the action will create extra parent dir
name: sdist
path: dist
- name: Download built wheels
uses: actions/download-artifact@v7
with:
pattern: wheels-*
path: dist
merge-multiple: true
- name: List distributions
run: ls -lh dist/*
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
name: artifacts
path: dist/*
if-no-files-found: error
name: Performance Benchmark Bot
on:
issue_comment:
types:
- created
permissions:
contents: read
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true # always cancel in-progress
env:
PYTHONDEVMODE: "1"
PYTHONUNBUFFERED: "1"
PYTHONPATH: "" # explicit cleanup
PIP_USER: "" # explicit cleanup
COLUMNS: "100"
FORCE_COLOR: "1"
CLICOLOR_FORCE: "1"
XDG_CACHE_HOME: "${{ github.workspace }}/.cache" # to be updated
PIP_CACHE_DIR: "${{ github.workspace }}/.cache/pip" # to be updated
jobs:
perfbench:
name: Benchmark between PR and main
if: |
github.repository_owner == 'tile-ai' &&
github.event.issue.pull_request &&
(contains(github.event.comment.body, '/performance-report') || contains(github.event.comment.body, '/perf'))
runs-on: [self-hosted, nvidia]
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
ref: refs/pull/${{ github.event.issue.number }}/merge
fetch-depth: 0
submodules: recursive
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
update-environment: true
cache: pip
cache-dependency-path: |
pyproject.toml
requirements*.txt
- name: Install merged version
run: |
python -m venv tll
source tll/bin/activate
pip install -r requirements-test.txt
pip install .
- name: Install original version
run: |
echo "Check files to be deleted!"
git clean -dxf -e tll/
echo "Delete files completed!"
git checkout main
python -m venv tl
source tl/bin/activate
pip install -r requirements-test.txt
pip install .
- name: Run performance test
id: perfbench
run: |
source tl/bin/activate
python maint/scripts/ci_performance.py
- name: Post test results as PR comment
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: '📊 ​**Performance Test Results** (triggered by @' + context.payload.comment.user.login + '):\n\n' +
'Run listed here: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\n\n' +
"${{ steps.perfbench.outputs.stdout }}"
})
name: PR Reminder Bot
on:
pull_request_target:
types:
- opened
jobs:
remind:
runs-on: ubuntu-latest
if: github.repository_owner == 'tile-ai'
steps:
- name: Remind
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: '👋 Hi! Thank you for contributing to the **TileLang** project.\n\n' +
'Please remember to run `pre-commit run --all-files` in the root directory of the project ' +
'to ensure your changes are properly linted and formatted. ' +
'This will help ensure your contribution passes the format check.\n\n' +
'We appreciate you taking this step! ' +
'Our team will review your contribution, and we look forward to your awesome work! 🚀'
})
name: Documentation
on:
pull_request_target:
types:
- closed
workflow_dispatch:
permissions:
contents: write
jobs:
docs:
name: Build and Publish Docs
if: |
github.repository_owner == 'tile-ai' &&
(
(
github.event_name == 'pull_request_target' &&
github.event.pull_request.merged == true &&
github.event.pull_request.base.ref == 'main'
) ||
github.event_name == 'workflow_dispatch'
)
runs-on: [self-hosted, nvidia]
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: recursive
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.10"
- name: Build docs
run: |
bash -ex maint/scripts/build_docs.sh
- name: Push built docs to another repo
run: |
# Hide sensitive info in logs
echo "::add-mask::${{ secrets.TARGET_TOKEN }}"
echo "::add-mask::${{ secrets.TARGET_REPO }}"
TARGET_REPO_URL="https://github.com/${{ secrets.TARGET_REPO }}.git"
git clone "${TARGET_REPO_URL}" -b main target_repo
cd target_repo
git config --local user.name "github-actions[bot]"
git config --local user.email "github-actions[bot]@users.noreply.github.com"
find . -mindepth 1 -maxdepth 1 ! -name ".github" ! -name "." ! -name ".git" -exec rm -rf {} +
cp -r ../docs/_build/html/* ./
git add .
if [[ -n "$(git status --porcelain)" ]]; then
# If there are changes, commit and push
git commit -m "Update docs"
git push "https://github-actions[bot]:${{ secrets.TARGET_TOKEN }}@${TARGET_REPO_URL##*://}" main
else
echo "No changes detected, skipping commit and push."
fi
# Compiled Object files
*.slo
*.lo
*.o
*.so
*.obj
*.pyc
# Precompiled Headers
*.gch
*.pch
# emacs
*~
# vim
*.swp
*.swo
debug/
build/
*dist/
dist*/
wheelhouse/
__pycache__
nnfusion.tar.gz
# makeenv and test intermediate files
tmp/
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.vscode/
.vs/
# VisualGDB files
VisualGDB/
toolchain.cmake
# docbuild artifacts
doc/sphinx/build/*
doc/doxygen/*.xml
doc/doxygen/*.html
doc/doxygen/man/*
doc/doxygen/latex/*
doc/doxygen/xml/*
doc/doxygen/html/*
# git merge
*.orig
\#*
\.#*
# idea
.idea/*
# python egg
*.egg-info
# Macos
**/.DS_Store
nnfusion_rt/
models/frozenmodels/
# log
*.log
# pkl
*.pkl_*
# .pytest_cache
.pytest_cache
# .hypothesis
.hypothesis
# .ruff_cache
.ruff_cache
# exclude debug testing folder
!testing/python/debug
# ignore lib with develop mode
tilelang/lib
# cython
tilelang/jit/adapter/cython/.cycache
# cache directory for clangd
.cache/
# claude
**/.claude
# CMake
cmake-build/
cmake-build-*/
# Git version for sdist
.git_commit.txt
# pre-commit cache
.pre-commit-cache/*
# host checks logs
maint/host_checks/logs/*
# ncu
*.ncu-rep
# csv
*.csv
# clang-tidy
/run-clang-tidy.py
[submodule "3rdparty/cutlass"]
path = 3rdparty/cutlass
url = https://github.com/NVIDIA/cutlass
[submodule "3rdparty/tvm"]
path = 3rdparty/tvm
url = https://github.com/TileLang/tvm
[submodule "3rdparty/composable_kernel"]
path = 3rdparty/composable_kernel
url = https://github.com/ROCm/composable_kernel
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
ci:
autofix_prs: false
autofix_commit_msg: "[Lint]: [pre-commit.ci] auto fixes [...]"
autoupdate_commit_msg: "[CI] [pre-commit.ci] autoupdate"
autoupdate_schedule: monthly
default_stages: [pre-commit, pre-push, manual]
exclude: '^(build|3rdparty)/.*$' # exclude build and 3rdparty directories
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-symlinks
- id: destroyed-symlinks
# FIXME: enable these hooks
# - id: trailing-whitespace
# - id: end-of-file-fixer
- id: check-added-large-files
- id: check-merge-conflict
fail_fast: true
# FIXME: enable these hooks
# - id: check-executables-have-shebangs
# - id: check-shebang-scripts-are-executable
- id: detect-private-key
- id: check-yaml
- id: check-toml
- id: check-ast
fail_fast: true
- id: debug-statements
- id: file-contents-sorter
args: [--ignore-case]
files: ^docs/spelling_wordlist\.txt$
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v21.1.7 # sync with requirements-lint.txt
hooks:
- id: clang-format
types_or: [c++, c]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.9 # sync with requirements-lint.txt
hooks:
- id: ruff-check
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format
args: [--exit-non-zero-on-format]
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1 # sync with requirements-lint.txt
hooks:
- id: codespell
additional_dependencies: [".[toml]"]
exclude: |
(?x)(
^.+\.(cpp|hpp|cxx|cc|c|h|cu|cuh)$|
^.+\.svg$|
^.*\brequirements\b.*\.txt$
)
Subproject commit b38bb492a1a55b5abb0c345962143c0f9c482cfb
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment