解决w8a8 pp16开启marlin的oom问题

21f31cf6 · SAC_fanth · aa71cc53 · 21f31cf6
Commit 21f31cf6 authored Nov 10, 2025 by SAC_fanth
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe_marlin.py ...ation/compressed_tensors/compressed_tensors_moe_marlin.py +1 -1

No files found.
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe_marlin.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe_marlin.py
@@ -127,7 +127,7 @@ class CompressedTensorsW8A8Int8MarlinMoEMethod(CompressedTensorsMarlinMoEMethod)
            w1_marlin_in = get_w8a8_int8_marlin_weights(layer.w13_weight[ii])
            w1_marlin_list.append(w1_marlin_in)
        w1_marlin = torch.stack(w1_marlin_list, dim=0)
+        del w1_marlin_list
        w2_marlin_list = []
        for ii in range(layer.w2_weight.shape[0]):
            w2_marlin_in = get_w8a8_int8_marlin_weights(layer.w2_weight[ii])