Commit 028280dc authored by Alan Turner's avatar Alan Turner
Browse files

Formatting

parent e1bd6573
...@@ -64,19 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins) ...@@ -64,19 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant = qt.max(); max_quant = qt.max();
min_quant = qt.min(); min_quant = qt.min();
}); });
if (enabled(MIGRAPHX_BROADCAST_Q{})) if(enabled(MIGRAPHX_BROADCAST_Q{}))
{ {
auto s = add_zero_point->get_shape(); auto s = add_zero_point->get_shape();
auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}}); auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}}); auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
// auto min_mbcast = // auto min_mbcast =
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg); // m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
// min_arg);
// auto max_mbcast = // auto max_mbcast =
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg); // m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
// max_arg);
// auto saturate = // auto saturate =
// m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast); // m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg}); auto saturate =
insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
m.replace_instruction( m.replace_instruction(
ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate); ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
} }
...@@ -88,7 +91,8 @@ void apply_quantizelinear(module& m, instruction_ref ins) ...@@ -88,7 +91,8 @@ void apply_quantizelinear(module& m, instruction_ref ins)
auto min_arg = m.add_literal(literal(s, min_data)); auto min_arg = m.add_literal(literal(s, min_data));
auto max_arg = m.add_literal(literal(s, max_data)); auto max_arg = m.add_literal(literal(s, max_data));
auto saturate = m.insert_instruction(ins, make_op("clip"), add_zero_point, min_arg, max_arg); auto saturate =
m.insert_instruction(ins, make_op("clip"), add_zero_point, min_arg, max_arg);
m.replace_instruction( m.replace_instruction(
ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate); ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
} }
......
import subprocess, csv, re, datetime import subprocess, csv, re, datetime
class CSVFile: class CSVFile:
def __init__(self, path="output.csv"): def __init__(self, path="output.csv"):
self.path = path self.path = path
...@@ -18,7 +20,13 @@ def get_device_name(): ...@@ -18,7 +20,13 @@ def get_device_name():
matches = re.findall("gfx\d*[a-z]*", str(out.stdout)) matches = re.findall("gfx\d*[a-z]*", str(out.stdout))
return matches[0] return matches[0]
def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, disable_fusion=False):
def run_perf(model,
batch_size,
int8=False,
use_ck=False,
use_large_k=False,
disable_fusion=False):
env_vars = "" env_vars = ""
if use_ck: if use_ck:
env_vars += "MIGRAPHX_ENABLE_CK=1 " env_vars += "MIGRAPHX_ENABLE_CK=1 "
...@@ -28,10 +36,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis ...@@ -28,10 +36,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
env_vars += "MIGRAPHX_DISABLE_CK_FUSION=1 " env_vars += "MIGRAPHX_DISABLE_CK_FUSION=1 "
int8_str = "--int8" if int8 else "" int8_str = "--int8" if int8 else ""
cmd = f"{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8_str} --exhaustive-tune" cmd = f"{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8_str} --exhaustive-tune"
out = subprocess.run(cmd, out = subprocess.run(cmd, capture_output=True, check=True, shell=True)
capture_output=True,
check=True,
shell=True)
summary = re.findall("Summary.*", str(out.stdout))[0].replace("\\n", "\n") summary = re.findall("Summary.*", str(out.stdout))[0].replace("\\n", "\n")
total_time = re.findall("Total time: \d+\.\d*", summary)[0] total_time = re.findall("Total time: \d+\.\d*", summary)[0]
...@@ -43,7 +48,8 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis ...@@ -43,7 +48,8 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
else: else:
ck_gemm_time = "0.0" ck_gemm_time = "0.0"
rb_gemm_time = re.findall("gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*", summary) rb_gemm_time = re.findall("gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*",
summary)
if rb_gemm_time: if rb_gemm_time:
rb_gemm_time = re.findall("\d+\.\d*", rb_gemm_time[0])[0] rb_gemm_time = re.findall("\d+\.\d*", rb_gemm_time[0])[0]
else: else:
...@@ -67,6 +73,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis ...@@ -67,6 +73,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
return [total_time] + gemm_times return [total_time] + gemm_times
def run_ck_perf(model, batch_size, int8=False, use_large_k=False): def run_ck_perf(model, batch_size, int8=False, use_large_k=False):
# CK with fusions # CK with fusions
total_time = run_perf(model, batch_size, int8, True, use_large_k, False)[0] total_time = run_perf(model, batch_size, int8, True, use_large_k, False)[0]
...@@ -76,7 +83,6 @@ def run_ck_perf(model, batch_size, int8=False, use_large_k=False): ...@@ -76,7 +83,6 @@ def run_ck_perf(model, batch_size, int8=False, use_large_k=False):
return [total_time] + gemm_times[1:] return [total_time] + gemm_times[1:]
if __name__ == "__main__": if __name__ == "__main__":
device_id = get_device_name() device_id = get_device_name()
model = "/code/bert_base_cased_1_fp16_gpu.onnx" model = "/code/bert_base_cased_1_fp16_gpu.onnx"
...@@ -84,7 +90,10 @@ if __name__ == "__main__": ...@@ -84,7 +90,10 @@ if __name__ == "__main__":
cf.write_row([str(datetime.datetime.now())]) cf.write_row([str(datetime.datetime.now())])
cf.write_row([device_id]) cf.write_row([device_id])
cf.write_row([model]) cf.write_row([model])
headers = ["", "Total Time (ms)", "CK GEMM Time (ms)", "RB GEMM Time (ms)", "GEMM Pack Time (ms)", "Total GEMM Time (ms)"] headers = [
"", "Total Time (ms)", "CK GEMM Time (ms)", "RB GEMM Time (ms)",
"GEMM Pack Time (ms)", "Total GEMM Time (ms)"
]
batch_size = "1" batch_size = "1"
# int8: # int8:
...@@ -95,7 +104,8 @@ if __name__ == "__main__": ...@@ -95,7 +104,8 @@ if __name__ == "__main__":
# CK Only # CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True)) cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048) # CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False)) cf.write_row(["CK + rocBLAS(k>2048)"] +
run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only # rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize)) cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row() cf.write_row()
...@@ -108,7 +118,8 @@ if __name__ == "__main__": ...@@ -108,7 +118,8 @@ if __name__ == "__main__":
# CK Only # CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True)) cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048) # CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False)) cf.write_row(["CK + rocBLAS(k>2048)"] +
run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only # rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize)) cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row() cf.write_row()
...@@ -122,7 +133,8 @@ if __name__ == "__main__": ...@@ -122,7 +133,8 @@ if __name__ == "__main__":
# CK Only # CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True)) cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048) # CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False)) cf.write_row(["CK + rocBLAS(k>2048)"] +
run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only # rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize)) cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row() cf.write_row()
...@@ -135,7 +147,8 @@ if __name__ == "__main__": ...@@ -135,7 +147,8 @@ if __name__ == "__main__":
# CK Only # CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True)) cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048) # CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False)) cf.write_row(["CK + rocBLAS(k>2048)"] +
run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only # rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize)) cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row() cf.write_row()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment