[Lint] Add ruff config to check for useless spaces (#807)

* update lint config * Remove spaces for blank line * update

[Lint] Add ruff config to check for useless spaces (#807)
* update lint config * Remove spaces for blank line * update
5e529522 · Yichen Yan · GitHub · 4d54854b · 5e529522 · 5e529522
Unverified Commit 5e529522 authored Sep 13, 2025 by Yichen Yan Committed by GitHub Sep 13, 2025
20 changed files
--- a/testing/python/autotune/test_tilelang_autotune_with_inputs.py
+++ b/testing/python/autotune/test_tilelang_autotune_with_inputs.py
@@ -133,7 +133,7 @@ def run_autotune(M: int, N: int, K: int):
 def test_autotune_matmul():
    """
    Run the autotuning validation for the matmul kernel on a 1024x1024x1024 problem.
    This test constructs random CUDA tensors, autotunes the JIT-compiled block-level matrix-multiplication kernel,
    executes it, and asserts the result matches a reference CPU implementation within tolerances.
    """

--- a/testing/python/transform/test_tilelang_transform_lower_hopper_intrin.py
+++ b/testing/python/transform/test_tilelang_transform_lower_hopper_intrin.py
@@ -55,4 +55,4 @@ def test_lower_hopper_intrin_barrier():
 if __name__ == "__main__":
    tilelang.testing.main()
\ No newline at end of file
--- a/testing/python/transform/test_tilelang_transform_warp_specialized.py
+++ b/testing/python/transform/test_tilelang_transform_warp_specialized.py
@@ -118,4 +118,4 @@ def test_warp_specialized():
 if __name__ == "__main__":
    tilelang.testing.main()
\ No newline at end of file
--- a/tilelang/autotuner/tuner.py
+++ b/tilelang/autotuner/tuner.py
@@ -713,7 +713,7 @@ def autotune(  # This is the new public interface
    This decorator can be used without arguments (e.g., `@tilelang.jit`):
       Applies JIT compilation with default settings.
    Tips:
        - If you want to skip the auto-tuning process, you can set override the tunable parameters in the function signature.
            ```python

--- a/tilelang/carver/roller/rasterization.py
+++ b/tilelang/carver/roller/rasterization.py
@@ -78,7 +78,7 @@ __device__ __inline__ dim3 rasterization2DColumn(const int panel_width) {
    const auto bx = (panelIdx & 1) ? gridDim.x -(baseBlockIdx - panelIdx * panel_width * gridDim.x) /strideLd - 1 : (baseBlockIdx - panelIdx * panel_width *gridDim.x) / strideLd;
    const auto by = (baseBlockIdx - panelIdx * panel_width *gridDim.x) % strideLd + panelIdx * panel_width;
    const auto bz = blockIdx.z;
    dim3 blockIdx(bx, by, bz);
    return blockIdx;
 }

--- a/tilelang/carver/template/__init__.py
+++ b/tilelang/carver/template/__init__.py
@@ -6,4 +6,4 @@ from .gemv import GEMVTemplate  # noqa: F401
 from .elementwise import ElementwiseTemplate  # noqa: F401
 from .general_reduce import GeneralReductionTemplate  # noqa: F401
 from .flashattention import FlashAttentionTemplate  # noqa: F401
 from .conv import ConvTemplate  # noqa: F401
\ No newline at end of file
--- a/tilelang/carver/template/base.py
+++ b/tilelang/carver/template/base.py
@@ -12,8 +12,8 @@ from tvm.tir import PrimFunc  # Import PrimFunc for handling tensor IR functions
 @dataclass
 class BaseTemplate(ABC):
    """
-    Base class template for hardware-aware configurations. 
+    Base class template for hardware-aware configurations.
-    This serves as an abstract base class (ABC) that defines the structure 
+    This serves as an abstract base class (ABC) that defines the structure
    for subclasses implementing hardware-specific optimizations.
    """
@@ -30,9 +30,9 @@ class BaseTemplate(ABC):
    def get_hardware_aware_configs(self, arch: TileDevice = None, topk: int = 10) -> List[Hint]:
        """
        Abstract method that must be implemented by subclasses.
-        It should return a list of hardware-aware configurations (hints) 
+        It should return a list of hardware-aware configurations (hints)
        based on the specified architecture.
        Args:
            arch (TileDevice, optional): The target architecture. Defaults to None.
            topk (int, optional): Number of top configurations to return. Defaults to 10.
@@ -104,7 +104,7 @@ class BaseTemplate(ABC):
        """
        Placeholder method that should be implemented by subclasses.
        This method is responsible for initializing the function.
        Raises:
            NotImplementedError: If not implemented in the subclass.
        """

--- a/tilelang/carver/template/conv.py
+++ b/tilelang/carver/template/conv.py
@@ -62,8 +62,8 @@ class ConvTemplate(BaseTemplate):
        """
        Defines and initializes the convolution computation.
-        This method sets up placeholders for input matrices, computes 
+        This method sets up placeholders for input matrices, computes
-        the convolution using TVM's compute API, 
+        the convolution using TVM's compute API,
        and optionally applies bias and type casting.
        Raises:

--- a/tilelang/carver/template/flashattention.py
+++ b/tilelang/carver/template/flashattention.py
@@ -44,8 +44,8 @@ class FlashAttentionTemplate(BaseTemplate):
        """
        Defines and initializes the matrix multiplication computation.
-        This method sets up placeholders for input matrices, computes 
+        This method sets up placeholders for input matrices, computes
-        the matrix multiplication using TVM's compute API, 
+        the matrix multiplication using TVM's compute API,
        and optionally applies bias and type casting.
        Raises:

--- a/tilelang/carver/template/gemv.py
+++ b/tilelang/carver/template/gemv.py
@@ -12,7 +12,7 @@ class GEMVTemplate(BaseTemplate):
    """
    A template for Generalized Matrix-Vector Multiplication (GEMV).
-    This template defines the computation for a matrix-vector multiplication 
+    This template defines the computation for a matrix-vector multiplication
    with configurable parameters such as transposition, data types, and bias addition.
    """
@@ -43,8 +43,8 @@ class GEMVTemplate(BaseTemplate):
        """
        Defines and initializes the GEMV computation function.
-        This method sets up placeholders for input matrices, computes 
+        This method sets up placeholders for input matrices, computes
-        the matrix-vector multiplication using TVM's compute API, 
+        the matrix-vector multiplication using TVM's compute API,
        and optionally applies bias and type casting.
        """
        M: int = 1  # Fixed M value, representing a single batch dimension

--- a/tilelang/carver/template/matmul.py
+++ b/tilelang/carver/template/matmul.py
@@ -56,8 +56,8 @@ class MatmulTemplate(BaseTemplate):
        """
        Defines and initializes the matrix multiplication computation.
-        This method sets up placeholders for input matrices, computes 
+        This method sets up placeholders for input matrices, computes
-        the matrix multiplication using TVM's compute API, 
+        the matrix multiplication using TVM's compute API,
        and optionally applies bias and type casting.
        Raises:

--- a/tilelang/contrib/nvcc.py
+++ b/tilelang/contrib/nvcc.py
@@ -126,7 +126,7 @@ def compile_cuda(code,
 def find_cuda_path():
    """Utility function to find cuda path
    Returns
    -------
    path : str

--- a/tilelang/engine/callback.py
+++ b/tilelang/engine/callback.py
@@ -5,7 +5,7 @@ from tvm.target import Target
 def register_cuda_postproc(func: Callable[[str, Target], str], override: bool = True):
    """Register a post-processing function for CUDA code generation.
    Args:
        func: A callable that takes generated code (str) and target (Target) as input,
             and returns the processed code (str).
@@ -16,7 +16,7 @@ def register_cuda_postproc(func: Callable[[str, Target], str], override: bool =
 def register_hip_postproc(func: Callable[[str, Target], str], override: bool = True):
    """Register a post-processing function for HIP code generation.
    Args:
        func: A callable that takes generated code (str) and target (Target) as input,
             and returns the processed code (str).
@@ -27,17 +27,17 @@ def register_hip_postproc(func: Callable[[str, Target], str], override: bool = T
 def register_cuda_postproc_callback(func: Union[Callable, bool] = None, override: bool = True):
    """Decorator for registering CUDA post-processing callback function.
    Can be used with or without parentheses:
        @register_cuda_postproc_callback
        def func(code, target): ...
        @register_cuda_postproc_callback()
        def func(code, target): ...
        @register_cuda_postproc_callback(override=False)
        def func(code, target): ...
    Args:
        func: The function to be decorated or a boolean override flag
        override: Whether to override existing registered function. Defaults to True.
@@ -60,17 +60,17 @@ def register_cuda_postproc_callback(func: Union[Callable, bool] = None, override
 def register_hip_postproc_callback(func: Union[Callable, bool] = None, override: bool = True):
    """Decorator for registering HIP post-processing callback function.
    Can be used with or without parentheses:
        @register_hip_postproc_callback
        def func(code, target): ...
        @register_hip_postproc_callback()
        def func(code, target): ...
        @register_hip_postproc_callback(override=False)
        def func(code, target): ...
    Args:
        func: The function to be decorated or a boolean override flag
        override: Whether to override existing registered function. Defaults to True.

--- a/tilelang/engine/param.py
+++ b/tilelang/engine/param.py
@@ -21,13 +21,13 @@ class KernelParam:
    def from_buffer(cls, buffer: Buffer):
        """
        Creates a KernelParam instance from a TVM Buffer object.
        Args:
            buffer: TVM Buffer object containing dtype and shape information
        Returns:
            KernelParam instance with converted dtype and shape
        Raises:
            ValueError: If dimension type is not supported (not IntImm or Var)
        """
@@ -47,10 +47,10 @@ class KernelParam:
        """
        Creates a KernelParam instance from a TVM Variable object.
        Used for scalar parameters.
        Args:
            var: TVM Variable object containing dtype information
        Returns:
            KernelParam instance representing a scalar (empty shape)
        """
@@ -60,7 +60,7 @@ class KernelParam:
    def is_scalar(self) -> bool:
        """
        Checks if the parameter represents a scalar value.
        Returns:
            bool: True if parameter has no dimensions (empty shape), False otherwise
        """
@@ -69,7 +69,7 @@ class KernelParam:
    def is_unsigned(self) -> bool:
        """
        Checks if the parameter represents an unsigned integer type.
        Returns:
            bool: True if parameter is an unsigned integer type, False otherwise
        """
@@ -81,7 +81,7 @@ class KernelParam:
    def is_float8(self) -> bool:
        """
        Checks if the parameter represents a float8 type.
        Returns:
            bool: True if parameter is a float8 type, False otherwise
        """
@@ -93,7 +93,7 @@ class KernelParam:
    def is_boolean(self) -> bool:
        """
        Checks if the parameter represents a boolean type.
        Returns:
            bool: True if parameter is a boolean type, False otherwise
        """

--- a/tilelang/engine/phase.py
+++ b/tilelang/engine/phase.py
@@ -65,7 +65,7 @@ def LowerAndLegalize(mod: IRModule, target: Target) -> IRModule:
    # Bind the target device information to the module
    """
    Bind target information and progressively legalize and lower frontend Tile IR into a form suitable for downstream optimization and codegen.
    This pass pipeline:
    - Binds the provided target to the module.
    - Legalizes frontend Tile IR into TVM-compatible constructs.
@@ -75,11 +75,11 @@ def LowerAndLegalize(mod: IRModule, target: Target) -> IRModule:
    - Legalizes vectorized loops and inserts safety checks for memory accesses.
    - Re-simplifies to remove redundancies introduced by safety checks.
    - Attempts loop vectorization for dynamic-shaped loops.
    Parameters:
        mod (IRModule): The input IR module containing frontend Tile IR.
        target (Target): Target device information to bind into the module.
    Returns:
        IRModule: The transformed module, ready for target-specific optimization passes.
    """

--- a/tilelang/intrinsics/utils.py
+++ b/tilelang/intrinsics/utils.py
@@ -91,14 +91,14 @@ def get_mma_micro_size(dtype: Literal["float16", "int8"]):
    # Basic Tensor Core Matrix Multiply operation Unit
    """
    Return the MMA (Tensor Core) micro-tile dimensions for a given data type.
    This function returns the micro tile sizes (x, y, k) used by MMA/Tensor Core operations.
    - x: tile width in the output/result dimension
    - y: tile height in the output/result dimension
    - k: tile depth in the reduction/K dimension
    Accepted dtype strings include "float16", "int8" and some FP8 identifiers ("float8_e4m3", "float8_e5m2"). For FP8 and int8 types the reduction depth (`k`) is 32; for float16 it is 16.
    Returns:
        tuple[int, int, int]: (micro_size_x, micro_size_y, micro_size_k)
    """

--- a/tilelang/jit/__init__.py
+++ b/tilelang/jit/__init__.py
 """
-This module provides an auto-tuning infrastructure for TileLang (tl) programs. 
+This module provides an auto-tuning infrastructure for TileLang (tl) programs.
-It includes functionality to JIT-compile TileLang programs into a runnable 
+It includes functionality to JIT-compile TileLang programs into a runnable
 kernel adapter using TVM.
 """

--- a/tilelang/jit/adapter/__init__.py
+++ b/tilelang/jit/adapter/__init__.py
@@ -2,4 +2,4 @@ from .base import BaseKernelAdapter  # noqa: F401
 from .dlpack import TorchDLPackKernelAdapter  # noqa: F401
 from .ctypes import CtypesKernelAdapter  # noqa: F401
 from .cython import CythonKernelAdapter  # noqa: F401
 from .nvrtc import NVRTCKernelAdapter  # noqa: F401
\ No newline at end of file
--- a/tilelang/jit/adapter/ctypes/adapter.py
+++ b/tilelang/jit/adapter/ctypes/adapter.py
@@ -16,7 +16,7 @@ from tilelang.utils.language import retrieve_func_from_module
 class CtypesKernelAdapter(BaseKernelAdapter):
    """Adapter class that converts TVM/TIR functions to callable CUDA kernels using ctypes.
    This adapter handles:
    1. Converting TIR functions to compiled CUDA libraries
    2. Managing dynamic shapes in tensor operations
@@ -52,7 +52,7 @@ class CtypesKernelAdapter(BaseKernelAdapter):
                 pass_configs: Optional[Dict[str, Any]] = None,
                 compile_flags: Optional[List[str]] = None):
        """Initialize the adapter with the given TIR function or module.
        Args:
            params: List of tensor types for inputs/outputs
            result_idx: Indices of output tensors
@@ -157,7 +157,7 @@ class CtypesKernelAdapter(BaseKernelAdapter):
    def _process_dynamic_symbolic(self) -> Dict[tir.Var, Tuple[int, int, int]]:
        """Extract information about dynamic shapes from the TIR function.
        Maps symbolic variables to their corresponding (id, buffer_index, dimension)
        for runtime shape resolution.
        id represents shape or stride, 0 represents shape, 1 represents stride
@@ -184,7 +184,7 @@ class CtypesKernelAdapter(BaseKernelAdapter):
    def _forward_from_prebuild_lib(self, *args, stream: Optional[int] = None):
        """Low-level function to call the compiled CUDA kernel.
        Converts PyTorch tensor pointers to C void pointers for ctypes interface.
        """
        ctypes_args = [
@@ -197,17 +197,17 @@ class CtypesKernelAdapter(BaseKernelAdapter):
                                        *ins: List[torch.Tensor],
                                        stream: Optional[int] = None):
        """High-level wrapper for kernel execution.
        Handles:
        1. Input validation
        2. Output tensor allocation
        3. Dynamic shape resolution
        4. CUDA stream management
        Args:
            ins: Input PyTorch tensors
            stream: Optional CUDA stream for asynchronous execution
        Returns:
            Single tensor or list of tensors containing the kernel results
        """

--- a/tilelang/jit/adapter/cython/adapter.py
+++ b/tilelang/jit/adapter/cython/adapter.py
@@ -176,7 +176,7 @@ from cython_wrapper import CythonKernelWrapper
 class CythonKernelAdapter(BaseKernelAdapter):
    """Adapter class that converts TVM/TIR functions to callable CUDA kernels using ctypes.
    This adapter handles:
    1. Converting TIR functions to compiled CUDA libraries
    2. Managing dynamic shapes in tensor operations
@@ -222,7 +222,7 @@ class CythonKernelAdapter(BaseKernelAdapter):
                 pass_configs: Optional[Dict[str, Any]] = None,
                 compile_flags: Optional[List[str]] = None):
        """Initialize the adapter with the given TIR function or module.
        Args:
            params: List of tensor types for inputs/outputs
            result_idx: Indices of output tensors
@@ -347,7 +347,7 @@ class CythonKernelAdapter(BaseKernelAdapter):
    def _process_dynamic_symbolic(self) -> Dict[tir.Var, Tuple[int, int, int]]:
        """Extract information about dynamic shapes from the TIR function.
        Maps symbolic variables to their corresponding (id, buffer_index, dimension)
        for runtime shape resolution.
        id represents shape or stride, 0 represents shape, 1 represents stride
@@ -374,7 +374,7 @@ class CythonKernelAdapter(BaseKernelAdapter):
    def _process_buffer_dtype(self) -> Dict[tir.Var, Tuple[int, torch.dtype]]:
        """Extract information about buffer dtypes from the TIR function.
        Maps buffer variables to their corresponding dtypes.
        """
        func = self.prim_func
@@ -390,7 +390,7 @@ class CythonKernelAdapter(BaseKernelAdapter):
    def _process_ptr_map(self) -> Dict[int, str]:
        """Extract information about pointer arguments from the TIR function.
        Maps pointer arguments to their corresponding (buffer_index, shape_dimension)
        for runtime shape resolution.
        """
@@ -407,7 +407,7 @@ class CythonKernelAdapter(BaseKernelAdapter):
                  Dict[tir.Var, Tuple[int, List[Tuple[int, int]]]],
                  List[Tuple[tir.Var]]]:
        """Extract information about static shapes from the TIR function.
        Maps buffer variables to their corresponding static shapes.
        """
        func = self.prim_func
@@ -438,7 +438,7 @@ class CythonKernelAdapter(BaseKernelAdapter):
    def _process_buffer_device(self) -> Dict[tir.Var, Tuple[int, torch.device]]:
        """Extract information about buffer devices from the TIR function.
        Maps buffer variables to their corresponding devices.
        """
        func = self.prim_func
@@ -462,7 +462,7 @@ class CythonKernelAdapter(BaseKernelAdapter):
    def _forward_from_prebuild_lib(self, *args, stream: Optional[int] = None):
        """Low-level function to call the compiled CUDA kernel.
        Converts PyTorch tensor pointers to C void pointers for ctypes interface.
        """
        ctypes_args = [