Optimize triton_mrope with torch compile (#12112)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>

Optimize triton_mrope with torch compile (#12112)
Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
f389f017 · Yuan Luo · GitHub · caa4819b · f389f017
Unverified Commit f389f017 authored Oct 27, 2025 by Yuan Luo Committed by GitHub Oct 27, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

python/sglang/srt/layers/rotary_embedding.py python/sglang/srt/layers/rotary_embedding.py +3 -0

No files found.
--- a/python/sglang/srt/layers/rotary_embedding.py
+++ b/python/sglang/srt/layers/rotary_embedding.py
@@ -1424,6 +1424,7 @@ class MRotaryEmbedding(RotaryEmbedding):
        else:
            return self._forward_native(positions, query, key)
+    @torch.compile(dynamic=True, backend=get_compiler_backend())
    def _forward_triton(
        self,
        positions: torch.Tensor,
@@ -1442,6 +1443,7 @@ class MRotaryEmbedding(RotaryEmbedding):
        if positions.ndim == 2:
            assert self.mrope_section
+            torch._dynamo.graph_break()
            q, k = triton_mrope(
                query,
                key,
@@ -1453,6 +1455,7 @@ class MRotaryEmbedding(RotaryEmbedding):
                self.mrope_interleaved,
                self.is_neox_style,
            )
+            torch._dynamo.graph_break()
            return q.reshape(query_shape), k.reshape(key_shape)