[CI] add `pre-commit` integration (#955)

* chore: misc cleanup * feat: add pre-commit config * chore: update lint dependencies * style: fix lint issues * feat: add pre-commit hooks * fix: fix typos * chore: update .gitattributes * [Lint]: [pre-commit.ci] auto fixes [...] * docs: update CONTRIBUTING.md * chore: update default venv name * chore: revert and exclude CUDA files --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

[CI] add `pre-commit` integration (#955)
* chore: misc cleanup * feat: add pre-commit config * chore: update lint dependencies * style: fix lint issues * feat: add pre-commit hooks * fix: fix typos * chore: update .gitattributes * [Lint]: [pre-commit.ci] auto fixes [...] * docs: update CONTRIBUTING.md * chore: update default venv name * chore: revert and exclude CUDA files --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
8fe35402 · Xuehai Pan · GitHub · f8ae600c · 8fe35402 · 8fe35402
Unverified Commit 8fe35402 authored Oct 10, 2025 by Xuehai Pan Committed by GitHub Oct 10, 2025
19 changed files
--- a/.clang-format
+++ b/.clang-format
+---
+BasedOnStyle: LLVM
+UseTab: Never
+IndentWidth: 2
+ColumnLimit: 80
+
+Language: Cpp
+Standard: c++17
--- a/.editorconfig
+++ b/.editorconfig
@@ -14,7 +14,10 @@ insert_final_newline = true
 indent_size = 4

 [*.{cpp,hpp,cxx,cc,c,h,cu,cuh}]
-indent_size = 4
+indent_size = 2
+
+[{*.cmake,CMakeLists.txt}]
+indent_size = 2

 [*.{yaml,yml}]
 indent_size = 2

--- a/.gitattributes
+++ b/.gitattributes
+* text eol=lf
+*.bat eol=crlf
+
+*.svg binary
+*.jpg binary
+*.jpeg binary
+*.png binary
+*.gif binary
+
 *.h linguist-language=C++
--- a/.gitignore
+++ b/.gitignore
@@ -26,7 +26,14 @@ nnfusion.tar.gz
 # makeenv and test intermediate files
 tmp/

+.env
+.envrc
+.venv
+env/
 venv/
+ENV/
+env.bak/
+venv.bak/
 .vscode/
 .vs/


--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+ci:
+  autofix_prs: true
+  autofix_commit_msg: "[Lint]: [pre-commit.ci] auto fixes [...]"
+  autoupdate_commit_msg: "[CI] [pre-commit.ci] autoupdate"
+  autoupdate_schedule: monthly
+default_stages: [pre-commit, pre-push, manual]
+exclude: '^(build|3rdparty)/.*$'  # exclude build and 3rdparty directories
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+      - id: check-symlinks
+      - id: destroyed-symlinks
+      # FIXME: enable these hooks
+      # - id: trailing-whitespace
+      # - id: end-of-file-fixer
+      - id: check-added-large-files
+      - id: check-merge-conflict
+        fail_fast: true
+      # FIXME: enable these hooks
+      # - id: check-executables-have-shebangs
+      # - id: check-shebang-scripts-are-executable
+      - id: detect-private-key
+      - id: check-yaml
+      - id: check-toml
+      - id: check-ast
+        fail_fast: true
+      - id: debug-statements
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v15.0.7  # sync with requirements-lint.txt
+    hooks:
+      - id: clang-format
+        exclude: |
+          (?ix)(
+            ^.+\.(cu|cuh)$|
+            ^.+\.json$
+          )
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.0  # sync with requirements-lint.txt
+    hooks:
+      - id: ruff-check
+        args: [--fix, --exit-non-zero-on-fix]
+  - repo: https://github.com/google/yapf
+    rev: v0.43.0  # sync with requirements-lint.txt
+    hooks:
+      - id: yapf
+        args: [--recursive, --in-place]
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.4.1  # sync with requirements-lint.txt
+    hooks:
+      - id: codespell
+        additional_dependencies: [".[toml]"]
+        exclude: |
+          (?x)(
+            ^.+\.(cpp|hpp|cxx|cc|c|h|cu|cuh)$|
+            ^.+\.svg$|
+            ^.*\brequirements\b.*\.txt$
+          )
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -56,7 +56,7 @@ else()

  # Set default build type to RelWithDebInfo if not provided
  if(NOT CMAKE_BUILD_TYPE)
-  # Set default build type to Release if not provided
+    # Set default build type to Release if not provided
    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
    message(STATUS "Setting default build type to ${CMAKE_BUILD_TYPE}")
  endif()
@@ -199,7 +199,7 @@ if(USE_CUDA)
  set(CUDA_MAJOR_VERSION ${CUDAToolkit_VERSION_MAJOR})
  message(STATUS "Setting CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}")
  add_compile_definitions(CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION})
-  
+
  list(APPEND TILE_LANG_INCLUDES ${CUDAToolkit_INCLUDE_DIRS})
 endif(USE_CUDA)


--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,14 +2,19 @@

 That would be awesome if you want to contribute something to TileLang!

- [Contributing](CONTRIBUTING.md#contributing)
-  - [Reporting Bugs](CONTRIBUTING.md#reporting-bugs)
-  - [Asking Questions](CONTRIBUTING.md#asking-questions)
-  - [Submitting Pull Requests](CONTRIBUTING.md#submitting-pull-requests)
-  - [Repository Setup](CONTRIBUTING.md#repository-setup)
-  - [Running Tests](CONTRIBUTING.md#running-tests)
+### Table of Contents  <!-- omit in toc --> <!-- markdownlint-disable heading-increment -->

-## Reporting Bugs
+- [Report Bugs](#report-bugs)
+- [Ask Questions](#ask-questions)
+- [Submit Pull Requests](#submit-pull-requests)
+- [Setup Development Environment](#setup-development-environment)
+- [Install Develop Version](#install-develop-version)
+- [Lint Check](#lint-check)
+- [Test Locally](#test-locally)
+- [Build Wheels](#build-wheels)
+- [Documentation](#documentation)
+
+## Report Bugs

 If you run into any weird behavior while using TileLang, feel free to open a new issue in this repository! Please run a **search before opening** a new issue, to make sure that someone else hasn't already reported or solved the bug you've found.

@@ -18,35 +23,86 @@ Any issue you open must include:
 - Code snippet that reproduces the bug with a minimal setup.
 - A clear explanation of what the issue is.

-
-## Asking Questions
+## Ask Questions

 Please ask questions in issues.

-## Submitting Pull Requests
+## Submit Pull Requests

 All pull requests are super welcomed and greatly appreciated! Issues in need of a solution are marked with a [`♥ help`](https://github.com/ianstormtaylor/TileLang/issues?q=is%3Aissue+is%3Aopen+label%3A%22%E2%99%A5+help%22) label if you're looking for somewhere to start.

-Please run `./format.sh` before submitting a pull request to make sure that your code is formatted correctly.
+If you're new to contributing to TileLang, you can follow the following guidelines before submitting a pull request.
+
+> [!NOTE]
+> Please include tests and docs with every pull request if applicable!
+
+## Setup Development Environment
+
+Before contributing to TileLang, please follow the instructions below to setup.
+
+1. Fork TileLang ([fork](https://github.com/tile-ai/tilelang/fork)) on GitHub and clone the repository.
+
+    ```bash
+    git clone --recurse-submodules git@github.com:<your username>/tilelang.git  # use the SSH protocol
+    cd tilelang
+
+    git remote add upstream git@github.com:tile-ai/tilelang.git
+    ```
+
+2. Setup a development environment:
+
+    ```bash
+    uv venv --seed .venv  # use `python3 -m venv .venv` if you don't have `uv`
+
+    source .venv/bin/activate
+    python3 -m pip install --upgrade pip setuptools wheel "build[uv]"
+    uv pip install --requirements requirements-dev.txt
+    ```
+
+3. Setup the [`pre-commit`](https://pre-commit.com) hooks:
+
+    ```bash
+    pre-commit install --install-hooks
+    ```

-Please include tests and docs with every pull request!
+Then you are ready to rock. Thanks for contributing to TileLang!

-## Repository Setup
+## Install Develop Version

-To run the build, you need to have the TileLang repository cloned to your computer. After that, you need to `cd` into the directory where you cloned it, and install the dependencies with `python`:
+To install TileLang in an "editable" mode, run:

 ```bash
-python setup.py install
+python3 -m pip install --no-build-isolation --verbose --editable .
 ```

+in the main directory. This installation is removable by:

-## Running Tests
+```bash
+python3 -m pip uninstall tilelang
+```
+
+## Lint Check
+
+To check the linting, run:
+
+```bash
+pre-commit run --all-files
+```
+
+## Test Locally

-To run the tests, start by building the project as described in the [Repository Setup](CONTRIBUTING.md#repository-setup) section.
+To run the tests, start by building the project as described in the [Setup Development Environment](#setup-development-environment) section.

 Then you can rerun the tests with:

-```text
-python -m pytest testing
+```bash
+python3 -m pytest testing
 ```

+## Build Wheels
+
+_TBA_
+
+## Documentation
+
+_TBA_
--- a/docs/deeplearning_operators/matmul.md
+++ b/docs/deeplearning_operators/matmul.md
@@ -8,7 +8,7 @@
 :class: myclass1 myclass2
 :name: a-tip-reference

-   This document is still **experimental** and may be incomplete.  
+   This document is still **experimental** and may be incomplete.
   Suggestions and improvements are highly encouraged—please submit a PR!
 :::

@@ -256,4 +256,4 @@ For more advanced usage—including partial lowering, explicitly controlling thr
 * [BitBLAS](https://github.com/tile-ai/bitblas)
 * [Triton](https://github.com/openai/triton)
 * [Cutlass](https://github.com/NVIDIA/cutlass)
-* [PyCUDA](https://documen.tician.de/pycuda/)
+* [PyCUDA](https://documen.tician.de/pycuda/)  <!-- codespell:ignore -->
--- a/examples/deepseek_v32/fp8_lighting_indexer.py
+++ b/examples/deepseek_v32/fp8_lighting_indexer.py
@@ -258,6 +258,7 @@ def ref_fp8_mqa_logits(q: torch.Tensor, kv: torch.Tensor, weights: torch.Tensor,
    cost = mask.sum()
    return logits, cost

+
 def test_fp8_lighting_indexer(S=4096, SKV=8192, H=32, HKV=1, D=64, kv_stride=1):
    q = torch.randn(S, H, D, device="cuda", dtype=torch.bfloat16).to(torch.bfloat16)
    kv = torch.randn(SKV, D, device="cuda", dtype=torch.bfloat16).to(torch.bfloat16)
@@ -302,5 +303,6 @@ def test_fp8_lighting_indexer(S=4096, SKV=8192, H=32, HKV=1, D=64, kv_stride=1):
    print(f"logits_tflops: {logits_tflops}, logits_ms: {logits_ms}")
    print(f"cost_ref: {cost_ref}")

+
 if __name__ == "__main__":
    test_fp8_lighting_indexer()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,11 @@ skip = [
    ".venv"
 ]

+[tool.ruff]
+target-version = "py38"
+line-length = 100
+output-format = "full"
+
 [tool.ruff.lint]
 select = [
    # pycodestyle
@@ -48,13 +53,17 @@ ignore = [
    "E741",
    # line too long
    "E501",
+    # if-else-block instead of ternary
+    "SIM108",
    # key in dict.keys()
    "SIM118",
    # memory leaks
    "B019",
+    # zip without explicit strict
+    "B905",
    # No such file or directory
    "E902",
 ]
 [tool.ruff.lint.per-file-ignores]
 "3rdparty/**/*" = ["ALL"]
-"examples/deepseek_v32/inference/**/*" = ["ALL"]
\ No newline at end of file
+"examples/deepseek_v32/inference/**/*" = ["ALL"]
--- a/requirements-lint.txt
+++ b/requirements-lint.txt
 # formatting
-yapf==0.40.2
-toml==0.10.2
-tomli==2.0.1
-ruff==0.6.5
-codespell==2.3.0
+pre-commit
+yapf==0.43.0
+ruff==0.14.0
+codespell[toml]==2.4.1
 clang-format==15.0.7
 clang-tidy==18.1.8
--- a/setup.py
+++ b/setup.py
@@ -417,7 +417,7 @@ def patch_libs(libpath):
    subprocess.run([patchelf_path, '--set-rpath', '$ORIGIN', libpath])


-class TileLangBuilPydCommand(build_py):
+class TileLangBuildPyCommand(build_py):
    """Customized setuptools install command - builds TVM after setting up LLVM."""

    def run(self):
@@ -643,7 +643,7 @@ class CythonExtension(Extension):
        self.sourcedir = os.path.abspath(sourcedir)


-class TilelangExtensionBuild(build_ext):
+class TileLangExtensionBuild(build_ext):
    """
    Custom build_ext command for CMake-based projects.

@@ -929,8 +929,8 @@ setup(
        CythonExtension("TileLangCython", sourcedir="."),
    ],
    cmdclass={
-        "build_py": TileLangBuilPydCommand,
+        "build_py": TileLangBuildPyCommand,
        "sdist": TileLangSdistCommand,
-        "build_ext": TilelangExtensionBuild,
+        "build_ext": TileLangExtensionBuild,
    },
 )
--- a/src/layout/gemm_layouts.cc
+++ b/src/layout/gemm_layouts.cc
@@ -588,7 +588,7 @@ Layout makeGemmVoltaABLayout(int stride, int continuous, bool is_a,

 // ref:
 // https://github.com/nvidia/cutlass/blob/ad7b2f5e84fcfa124cb02b91d5bd26d238c0459e/include/cutlass/layout/tensor_op_multiplicand_sm75.h#L54
-// Althought the four settings (T or NT) used distinct layouts in CUTLASS, they
+// Although the four settings (T or NT) used distinct layouts in CUTLASS, they
 // appeared to result in the same mem layout
 Layout makeTensorOpMultiplicand(int mat_stride, int mat_continuous,
                                int elementsize, int crosswise) {

--- a/src/op/parallel.cc
+++ b/src/op/parallel.cc
@@ -215,9 +215,9 @@ LayoutMap ParallelOpNode::InferLayout(const LayoutInferArgs &T,
    return {};
  if (level == InferLevel::kStrict) {
    LayoutMap results;
-    // Deduce buffers that shoule be complicated replicated.
+    // Deduce buffers that should be complicated replicated.
    // For example:
-    // for i in T.Parllel(m):
+    // for i in T.Parallel(m):
    //   fragment[0] = x[i]
    // then fragment[0] must be replicated on all threads.
    for (const auto &[buffer, indices] : indice_map_) {

--- a/src/target/codegen_cuda.cc
+++ b/src/target/codegen_cuda.cc
@@ -2210,7 +2210,7 @@ void CodeGenTileLangCUDA::VisitExpr_(const BufferLoadNode *op,
  DataType element_dtype = op->buffer->dtype;

  int lanes = op->dtype.lanes();
-  // delcare type.
+  // declare type.
  if (value_dtype.lanes() == element_dtype.lanes()) {
    std::string ref = GetBufferRef(op->dtype, op->buffer.get(), index);
    HandleVolatileLoads(ref, op, os);

--- a/src/target/ptx.h
+++ b/src/target/ptx.h
@@ -258,7 +258,7 @@ std::string PrintArriveBarrierAsm(const std::string &barrier);
 * \brief Print ptx barrier arrival with expect tx operation using
 * mbarrier.arrive.expect_tx \param barrier: The name of the barrier in shared
 * memory. \param byte_count: Increases the tx count of the mbarrier object to
- * track completion of addtional async transactions.
+ * track completion of additional async transactions.
 */
 std::string PrintArriveBarrierExpectTxAsm(const std::string &barrier,
                                          const std::string &byte_count);

--- a/src/transform/inject_assumes.cc
+++ b/src/transform/inject_assumes.cc
@@ -33,8 +33,8 @@ private:
    };
    tvm::StructuralHash sh;
    tvm::StructuralEqual se;
-    // grouped by expr, since the amount of varidic shape symbols is usualy much
-    // smaller than buffer
+    // grouped by expr, since the amount of variadic shape symbols is usually
+    // much smaller than buffer
    std::vector<Item> items;
    // hash => index in items
    std::unordered_map<size_t, std::vector<size_t>> buckets;

--- a/src/transform/loop_vectorize_dynamic.cc
+++ b/src/transform/loop_vectorize_dynamic.cc
@@ -243,9 +243,9 @@ private:
  std::vector<PrimExpr> conditions_;
 };

-class VectorizedConditionExtracter : public StmtExprVisitor {
+class VectorizedConditionExtractor : public StmtExprVisitor {
 public:
-  VectorizedConditionExtracter() = default;
+  VectorizedConditionExtractor() = default;
  std::vector<PrimExpr> GetConditions(const Stmt &body) {
    this->VisitStmt(body);
    return conditions_;
@@ -268,6 +268,9 @@ private:
  std::vector<PrimExpr> conditions_;
 };

+// backward-compatibility: extracter -> extractor
+using VectorizedConditionExtracter = VectorizedConditionExtractor;
+
 class NestedLoopChecker : public StmtExprVisitor {
 public:
  NestedLoopChecker() : loop_num_(0) {}
@@ -391,8 +394,8 @@ private:
    vmap.Set(fnode->loop_var, outer_var * vector_size_ + inner_var);
    Stmt body = Substitute(fnode->body, vmap);

-    VectorizedConditionExtracter extracter;
-    std::vector<PrimExpr> conditions = extracter.GetConditions(body);
+    VectorizedConditionExtractor extractor;
+    std::vector<PrimExpr> conditions = extractor.GetConditions(body);

    VectorizedConditionMutator condition_mutator(inner_var, vector_size_);


--- a/tilelang/jit/adapter/libgen.py
+++ b/tilelang/jit/adapter/libgen.py
@@ -64,7 +64,7 @@ class LibraryGenerator(object):
        verbose = self.verbose
        if is_cuda_target(target):
            from tilelang.env import CUTLASS_INCLUDE_DIR
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)  # noqa: SIM115
            target_arch = get_target_arch(get_target_compute_version(target))
            libpath = src.name.replace(".cu", ".so")

@@ -111,7 +111,7 @@ class LibraryGenerator(object):

        elif is_hip_target(target):
            from tilelang.env import COMPOSABLE_KERNEL_INCLUDE_DIR
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)  # noqa: SIM115
            libpath = src.name.replace(".cpp", ".so")
            rocm_path = find_rocm_path()
            arch = get_rocm_arch(rocm_path)
@@ -128,7 +128,7 @@ class LibraryGenerator(object):
            ]
        elif is_cpu_target(target):
            from tilelang.contrib.cc import get_cplus_compiler
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)  # noqa: SIM115
            libpath = src.name.replace(".cpp", ".so")

            command = [get_cplus_compiler(), "-std=c++17", "-fPIC", "-shared", src.name]
@@ -228,7 +228,7 @@ class PyLibraryGenerator(LibraryGenerator):
        verbose = self.verbose
        if is_cuda_target(target):
            from tilelang.env import (CUDA_HOME, CUTLASS_INCLUDE_DIR, TILELANG_TEMPLATE_PATH)
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)  # noqa: SIM115
            libpath = src.name.replace(".cu", ".cubin")

            project_root = osp.join(osp.dirname(__file__), "..", "..")