Fix redundant kernel in sink dtype conversion (#8966)

d3e67deb · fzyzcjy · GitHub · 442534aa · d3e67deb
Unverified Commit d3e67deb authored Aug 09, 2025 by fzyzcjy Committed by GitHub Aug 09, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

python/sglang/srt/models/gpt_oss.py python/sglang/srt/models/gpt_oss.py +2 -2

No files found.
--- a/python/sglang/srt/models/gpt_oss.py
+++ b/python/sglang/srt/models/gpt_oss.py
@@ -247,7 +247,7 @@ class GptOssAttention(nn.Module):
        )
        self.sinks = nn.Parameter(
-            torch.empty(self.num_heads, dtype=params_dtype), requires_grad=False
+            torch.empty(self.num_heads, dtype=torch.float32), requires_grad=False
        )
        self.o_proj = RowParallelLinear(
@@ -301,7 +301,7 @@ class GptOssAttention(nn.Module):
        hidden_states, forward_batch, inner_state = intermediate_state
        if inner_state is None:
            return hidden_states
-        attn_output = self.attn(*inner_state, sinks=self.sinks.to(torch.float32))
+        attn_output = self.attn(*inner_state, sinks=self.sinks)
        output, _ = self.o_proj(attn_output)
        return output