Categorize `tests/kernels/` based on kernel type (#16799)

Signed-off-by: mgoin <mgoin64@gmail.com>

Categorize `tests/kernels/` based on kernel type (#16799)
Signed-off-by: mgoin <mgoin64@gmail.com>
6317a517 · Michael Goin · GitHub · aa72d9a4 · 6317a517 · 6317a517
Unverified Commit 6317a517 authored Apr 23, 2025 by Michael Goin Committed by GitHub Apr 23, 2025
14 changed files
--- a/tests/kernels/test_block_int8.py
+++ b/tests/kernels/test_block_int8.py
@@ -6,6 +6,7 @@ import itertools
 import pytest
 import torch
+from tests.kernels.utils_block import native_w8a8_block_matmul
 from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.fused_moe import fused_moe
@@ -13,8 +14,6 @@ from vllm.model_executor.layers.quantization.utils.int8_utils import (
    w8a8_block_int8_matmul)
 from vllm.platforms import current_platform
-from .utils_block import native_w8a8_block_matmul
 if current_platform.get_device_capability() < (7, 0):
    pytest.skip("INT8 Triton requires CUDA 7.0 or higher",
                allow_module_level=True)

--- a/tests/kernels/test_cutlass_2of4_sparse.py
+++ b/tests/kernels/test_cutlass_2of4_sparse.py
@@ -7,13 +7,12 @@ Run `pytest tests/kernels/test_semi_structured.py`.
 import pytest
 import torch
+from tests.kernels.utils import baseline_scaled_mm, to_fp8, to_int8
 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
    sparse_cutlass_supported)
 from vllm.platforms import current_platform
-from .utils import baseline_scaled_mm, to_fp8, to_int8
 CUDA_DEVICES = [
    f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)
 ]

--- a/tests/kernels/test_cutlass.py
+++ b/tests/kernels/test_cutlass.py
@@ -8,13 +8,11 @@ import random
 import pytest
 import torch
-from tests.kernels.utils import opcheck
+from tests.kernels.utils import baseline_scaled_mm, opcheck, to_fp8, to_int8
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
 from vllm.utils import cdiv
-from .utils import baseline_scaled_mm, to_fp8, to_int8
 MNK_FACTORS = [
    (1, 256, 128),
    (1, 16384, 1024),

--- a/tests/kernels/test_fp8_quant.py
+++ b/tests/kernels/test_fp8_quant.py
--- a/tests/kernels/test_ggml.py
+++ b/tests/kernels/test_ggml.py
--- a/tests/kernels/test_gguf.py
+++ b/tests/kernels/test_gguf.py
--- a/tests/kernels/test_gptq.py
+++ b/tests/kernels/test_gptq.py
--- a/tests/kernels/test_int8_kernel.py
+++ b/tests/kernels/test_int8_kernel.py
--- a/tests/kernels/test_int8_quant.py
+++ b/tests/kernels/test_int8_quant.py
--- a/tests/kernels/test_machete_mm.py
+++ b/tests/kernels/test_machete_mm.py
--- a/tests/kernels/test_marlin_gemm.py
+++ b/tests/kernels/test_marlin_gemm.py
--- a/tests/kernels/test_nvfp4_quant.py
+++ b/tests/kernels/test_nvfp4_quant.py
--- a/tests/kernels/test_nvfp4_scaled_mm.py
+++ b/tests/kernels/test_nvfp4_scaled_mm.py
--- a/tests/kernels/test_triton_scaled_mm.py
+++ b/tests/kernels/test_triton_scaled_mm.py