分离kmai 120cu与120cu w8a8-int8优化config共享

1b3e4da4 · gaoqiong · d59f30d4 · 1b3e4da4
Commit 1b3e4da4 authored Jun 18, 2025 by gaoqiong
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/utils.py vllm/utils.py +3 -1

No files found.
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -1766,6 +1766,8 @@ class W8a8GetCacheJSON:
    def get_w8a8json_name(self,n,k):
        from vllm.platforms import current_platform
        device_name = current_platform.get_device_name().replace(" ", "_")
+        if 'K100_AI' in device_name and torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120:
+            device_name='K100_AI_120'
        return self.triton_json_dir+f"/W8A8_{n}_{k}_{device_name}.json"
    
    def get_blockint8_triton_cache(self,file_path,n,k,block_n,block_k):