Commit 1b3e4da4 authored by gaoqiong's avatar gaoqiong
Browse files

分离kmai 120cu与120cu w8a8-int8优化config共享

parent d59f30d4
......@@ -1766,6 +1766,8 @@ class W8a8GetCacheJSON:
def get_w8a8json_name(self,n,k):
from vllm.platforms import current_platform
device_name = current_platform.get_device_name().replace(" ", "_")
if 'K100_AI' in device_name and torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120:
device_name='K100_AI_120'
return self.triton_json_dir+f"/W8A8_{n}_{k}_{device_name}.json"
def get_blockint8_triton_cache(self,file_path,n,k,block_n,block_k):
......@@ -2803,4 +2805,4 @@ def is_torch_equal_or_newer(target: str) -> bool:
return torch_version >= version.parse(target)
except Exception:
# Fallback to PKG-INFO to load the package info, needed by the doc gen.
return Version(importlib.metadata.version('torch')) >= Version(target)
\ No newline at end of file
return Version(importlib.metadata.version('torch')) >= Version(target)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment