Fix CI TestChunkedSGMV (#10737)

2101d93b · Lifu Huang · GitHub · 70e4b218 · 2101d93b · 2101d93b
Unverified Commit 2101d93b authored Sep 22, 2025 by Lifu Huang Committed by GitHub Sep 22, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 0 deletions

python/sglang/utils.py python/sglang/utils.py +6 -0

test/srt/lora/test_chunked_sgmv_backend.py test/srt/lora/test_chunked_sgmv_backend.py +11 -0

No files found.
--- a/python/sglang/utils.py
+++ b/python/sglang/utils.py
@@ -621,6 +621,12 @@ class CachedKernel:

        return complete_args

+    def _clear_cache(self):
+        """
+        Clear the kernel cache for testing purposes.
+        """
+        self.kernel_cache.clear()
+

 def cached_triton_kernel(key_fn=None):
    """

--- a/test/srt/lora/test_chunked_sgmv_backend.py
+++ b/test/srt/lora/test_chunked_sgmv_backend.py
@@ -10,11 +10,18 @@ from sglang.srt.lora.triton_ops import (
    chunked_sgmv_lora_expand_forward,
    chunked_sgmv_lora_shrink_forward,
 )
+from sglang.srt.lora.triton_ops.chunked_sgmv_expand import _chunked_lora_expand_kernel
+from sglang.srt.lora.triton_ops.chunked_sgmv_shrink import _chunked_lora_shrink_kernel
 from sglang.srt.lora.utils import LoRABatchInfo

 CHUNK_SIZE = 16


+def reset_kernel_cache():
+    _chunked_lora_shrink_kernel._clear_cache()
+    _chunked_lora_expand_kernel._clear_cache()
+
+
 def safe_matmul(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
    """Matrix multiplication with mixed precision handling for float16"""
    result = torch.matmul(a.float(), b.float())
@@ -436,6 +443,10 @@ class TestChunkedSGMV(unittest.TestCase):
        List[str],
    ]:
        """Create test batch with specified composition and mode"""
+
+        # Reset kernel cache to avoid cross-test contamination
+        reset_kernel_cache()
+
        seq_lengths = self.generate_sequence_lengths(
            batch_size, batch_mode, 1, self.max_seq_len
        )