Commit e1bd6573 authored by Alan Turner's avatar Alan Turner
Browse files

Use insert_common_op

parent 1f106ca7
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <migraphx/tune_axis.hpp> #include <migraphx/tune_axis.hpp>
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/common.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -68,13 +69,14 @@ void apply_quantizelinear(module& m, instruction_ref ins) ...@@ -68,13 +69,14 @@ void apply_quantizelinear(module& m, instruction_ref ins)
auto s = add_zero_point->get_shape(); auto s = add_zero_point->get_shape();
auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}}); auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}}); auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
auto min_mbcast = // auto min_mbcast =
m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg); // m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
auto max_mbcast = // auto max_mbcast =
m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg); // m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
auto saturate = // auto saturate =
m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast); // m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
m.replace_instruction( m.replace_instruction(
ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate); ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
} }
......
import subprocess, csv, re import subprocess, csv, re, datetime
class CSVFile:
def __init__(self, path="output.csv"):
self.path = path
def write_row(self, row=[]):
with open(self.path, "a+") as f:
cw = csv.writer(f)
cw.writerow(row)
def get_device_name(): def get_device_name():
out = subprocess.run("rocminfo", out = subprocess.run("rocminfo",
...@@ -17,37 +27,115 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis ...@@ -17,37 +27,115 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
if disable_fusion: if disable_fusion:
env_vars += "MIGRAPHX_DISABLE_CK_FUSION=1 " env_vars += "MIGRAPHX_DISABLE_CK_FUSION=1 "
int8_str = "--int8" if int8 else "" int8_str = "--int8" if int8 else ""
cmd = "{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8} --exhaustive-tune".format( cmd = f"{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8_str} --exhaustive-tune"
env_vars=env_vars,
model=model,
batch_size=str(batch_size),
int8=int8_str
)
out = subprocess.run(cmd, out = subprocess.run(cmd,
capture_output=True, capture_output=True,
check=True, check=True,
shell=True) shell=True)
summary = re.findall("Summary.*", str(out.stdout))[0].replace("\\n", "\n") summary = re.findall("Summary.*", str(out.stdout))[0].replace("\\n", "\n")
total_time = re.findall("Total time: \d+\.\d*", summary)[0] total_time = re.findall("Total time: \d+\.\d*", summary)[0]
total_time = total_time.replace("Total time: ", "") total_time = total_time.replace("Total time: ", "")
print(summary) ck_gemm_time = re.findall("ck_gemm_kernel: \d+\.\d*", summary)
print(total_time) if ck_gemm_time:
with open("summaries.txt", "w+") as f: ck_gemm_time = re.findall("\d+\.\d*", ck_gemm_time[0])[0]
else:
ck_gemm_time = "0.0"
rb_gemm_time = re.findall("gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*", summary)
if rb_gemm_time:
rb_gemm_time = re.findall("\d+\.\d*", rb_gemm_time[0])[0]
else:
rb_gemm_time = "0.0"
gemm_pack_time = re.findall("gpu::int8_gemm_pack_a: \d+\.\d*", summary)
if gemm_pack_time:
gemm_pack_time = re.findall("\d+\.\d*", gemm_pack_time[0])[0]
else:
gemm_pack_time = "0.0"
gemm_times = [ck_gemm_time, rb_gemm_time, gemm_pack_time]
total_gemm_time = [str(sum(map(float, gemm_times)))]
gemm_times.extend(total_gemm_time)
print(cmd)
print(total_time + "ms")
with open("perf_summaries.txt", "a+") as f:
f.write(cmd + "\n") f.write(cmd + "\n")
f.write(summary + "\n\n") f.write(summary + "\n\n")
return [total_time] + gemm_times
def run_ck_perf(model, batch_size, int8=False, use_large_k=False):
# CK with fusions
total_time = run_perf(model, batch_size, int8, True, use_large_k, False)[0]
# CK without fusions
gemm_times = run_perf(model, batch_size, int8, True, use_large_k, True)
# run model with: return [total_time] + gemm_times[1:]
# RocBlas
# Get gemm info
# CK
# With fusions
# Without fusions
if __name__ == "__main__": if __name__ == "__main__":
device_id = get_device_name() device_id = get_device_name()
model = "/code/bert_base_cased_1_fp16_gpu.onnx" model = "/code/bert_base_cased_1_fp16_gpu.onnx"
run_perf(model, 1, True, True, True, True) cf = CSVFile()
\ No newline at end of file cf.write_row([str(datetime.datetime.now())])
cf.write_row([device_id])
cf.write_row([model])
headers = ["", "Total Time (ms)", "CK GEMM Time (ms)", "RB GEMM Time (ms)", "GEMM Pack Time (ms)", "Total GEMM Time (ms)"]
batch_size = "1"
# int8:
quantize = True
label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
cf.write_row([label])
cf.write_row(headers)
# CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row()
# fp16:
quantize = False
label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
cf.write_row([label])
cf.write_row(headers)
# CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row()
batch_size = "64"
# int8:
quantize = True
label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
cf.write_row([label])
cf.write_row(headers)
# CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row()
# fp16:
quantize = False
label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
cf.write_row([label])
cf.write_row(headers)
# CK Only
cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
# CK + rocBLAS (k>2048)
cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
# rocBLAS Only
cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
cf.write_row()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment