update torch.float16

511eeccd · zhuwenwen · 42a95309 · 511eeccd · 511eeccd
Commit 511eeccd authored Nov 27, 2025 by zhuwenwen
Showing with 2 additions and 2 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +1 -1

vllm/v1/attention/backends/flash_attn.py vllm/v1/attention/backends/flash_attn.py +1 -1

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1885,7 +1885,7 @@ def fused_experts_impl(
                                use_nn_moe=use_nn_moe)

        if activation == "silu":
-            if envs.VLLM_USE_FUSE_SILU_AND_MUL and intermediate_cache1.dtype == intermediate_cache2.dtype == "fp16":
+            if envs.VLLM_USE_FUSE_SILU_AND_MUL and intermediate_cache1.dtype == intermediate_cache2.dtype == torch.float16:
                from lightop import fuse_silu_and_mul
                fuse_silu_and_mul(intermediate_cache1.view(-1, N),intermediate_cache2)    
            else:

--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -566,7 +566,7 @@ class FlashAttentionImpl(AttentionImpl):
                    layer._v_scale,
                )
            else:
-                if envs.VLLM_USE_OPT_RESHAPE_AND_CACHE and key.dtype == value.dtype == "fp16":
+                if envs.VLLM_USE_OPT_RESHAPE_AND_CACHE and key.dtype == value.dtype == torch.float16:
                    from lightop import reshape_and_cache_cuda
                    reshape_and_cache_cuda(
                        key, value,