Commit 1b3e4da4 authored by gaoqiong's avatar gaoqiong
Browse files

分离kmai 120cu与120cu w8a8-int8优化config共享

parent d59f30d4
......@@ -1766,6 +1766,8 @@ class W8a8GetCacheJSON:
def get_w8a8json_name(self,n,k):
from vllm.platforms import current_platform
device_name = current_platform.get_device_name().replace(" ", "_")
if 'K100_AI' in device_name and torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120:
device_name='K100_AI_120'
return self.triton_json_dir+f"/W8A8_{n}_{k}_{device_name}.json"
def get_blockint8_triton_cache(self,file_path,n,k,block_n,block_k):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment