增加稠密模型channelwise autotuning 接口

3e485650 · zhuwenwen · 4080ac85 · 3e485650 · 3e485650
Commit 3e485650 authored Jul 04, 2025 by zhuwenwen
Showing with 2 additions and 1 deletion

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py ...ers/quantization/compressed_tensors/compressed_tensors.py +1 -0

vllm/utils.py vllm/utils.py +1 -1

No files found.
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -613,6 +613,7 @@ class CompressedTensorsLinearMethod(LinearMethodBase):
            _weight=weight_data.T.contiguous().reshape(n,-1)
            layer.weight.data=_weight
            
+        self.tritonsingleton.gen_model_json() 
        layer.scheme.process_weights_after_loading(layer)   
        
    def create_weights(self, layer: torch.nn.Module,

--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -1900,7 +1900,7 @@ class W8a8GetCacheJSON:
        self.quant_method=None

    #析构函数，最后会生成model.json的配置文件
-    def gen_model_json(self,E:int,block_size:Optional[list]=None):
+    def gen_model_json(self,E:Optional[int]=0,block_size:Optional[list]=None):
        json_dir = os.getenv('LMSLIM_TUNING_JSON', "None")
        if json_dir is not "None" and os.path.exists(json_dir):
            #生成模型配置文件