Commit aefb81d8 authored by gaoqiong's avatar gaoqiong
Browse files

修改triton 量化w8a8 config命令方式

parent 14af6a70
......@@ -70,6 +70,7 @@ import vllm.envs as envs
from vllm.logger import enable_trace_function_call, init_logger
import json
if TYPE_CHECKING:
from argparse import Namespace
......@@ -1956,9 +1957,10 @@ class W8a8GetCacheJSON:
self.triton_json_list=[]
self.weight_shapes=[]
self.moe_weight_shapes=[]
device_name = current_platform.get_device_name().replace(" ", "_")
if 'K100_AI' in device_name and torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120:
device_name='K100_AI_120'
arch_name = torch.cuda.get_device_properties("cuda").gcnArchName.split(':')[0]
arch_cu = torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count
device_name =arch_name+'_'+str(arch_cu)+'cu'
self.device_name=device_name
self.topk=1
self.quant_method=None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment