"vscode:/vscode.git/clone" did not exist on "c6187f55f7c4844ed9ff5630d41114cbe6fccb6b"
Commit 1a9775b8 authored by gaoqiong's avatar gaoqiong
Browse files

增加w8a8的triton调度支持

parent 1c77f16e
......@@ -681,7 +681,8 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
json_file=self.tritonsingleton.get_w8a8json_name(n,k)
configs_dict=self.tritonsingleton.get_triton_cache(json_file,n,k)
all_json.update(configs_dict)
if configs_dict:
all_json.update(configs_dict)
if self.w8a8_strategy==1:
self.tritonsingleton.triton_json_dict.append(all_json)
......
......@@ -1122,7 +1122,8 @@ class QWenLMHeadModel(nn.Module, SupportsMultiModal):
json_file=self.tritonsingleton.get_w8a8json_name(n,k)
configs_dict=self.tritonsingleton.get_triton_cache(json_file,n,k)
all_json.update(configs_dict)
if configs_dict:
all_json.update(configs_dict)
if self.w8a8_strategy==1:
self.tritonsingleton.triton_json_dict.append(all_json)
......
......@@ -1414,4 +1414,6 @@ class W8a8GetCacheJSON:
return configs_dict
def get_w8a8json_name(self,n,k):
return self.triton_json_dir+f"/W8A8_{n}_{k}_DCUK100AI.json"
device_name = current_platform.get_device_name().replace(" ", "_")
return self.triton_json_dir+f"/W8A8_{n}_{k}_DCU{device_name}.json"
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment