update blockwise_int8.py

5d35160b · zhuwenwen · e34d3444 · 5d35160b
Commit 5d35160b authored Jul 14, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/model_executor/layers/quantization/blockwise_int8.py vllm/model_executor/layers/quantization/blockwise_int8.py +1 -1

No files found.
--- a/vllm/model_executor/layers/quantization/blockwise_int8.py
+++ b/vllm/model_executor/layers/quantization/blockwise_int8.py
@@ -139,7 +139,6 @@ class BlockInt8LinearMethod(LinearMethodBase):
        assert self.quant_config.weight_block_size is not None
        assert self.quant_config.is_checkpoint_int8_serialized
-        self.tritonsingleton= W8a8GetCacheJSON()
    def create_weights(
        self,
@@ -337,6 +336,7 @@ class BlockInt8MoEMethod:
        self.quant_config = quant_config
        assert self.quant_config.weight_block_size is not None
        assert self.quant_config.is_checkpoint_int8_serialized
+        self.tritonsingleton= W8a8GetCacheJSON()
    def create_weights(
        self,