Use insert_common_op

e1bd6573 · Alan Turner · 1f106ca7 · e1bd6573 · e1bd6573
Commit e1bd6573 authored Aug 01, 2023 by Alan Turner
Hide whitespace changes
Inline Side-by-side

Showing with 113 additions and 23 deletions

src/rewrite_quantization.cpp src/rewrite_quantization.cpp +8 -6

tools/gemm_perf.py tools/gemm_perf.py +105 -17

No files found.
--- a/src/rewrite_quantization.cpp
+++ b/src/rewrite_quantization.cpp
@@ -28,6 +28,7 @@
 #include <migraphx/tune_axis.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/shape.hpp>
+#include <migraphx/common.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -68,13 +69,14 @@ void apply_quantizelinear(module& m, instruction_ref ins)
        auto s       = add_zero_point->get_shape();
        auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
        auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
-        auto min_mbcast =
-            m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
-        auto max_mbcast =
-            m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
+        // auto min_mbcast =
+        //     m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
+        // auto max_mbcast =
+        //     m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
        
-        auto saturate =
-            m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
+        // auto saturate =
+        //     m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
+        auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
        m.replace_instruction(
            ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
    }

--- a/tools/gemm_perf.py
+++ b/tools/gemm_perf.py
-import subprocess, csv, re
+import subprocess, csv, re, datetime
+
+class CSVFile:
+    def __init__(self, path="output.csv"):
+        self.path = path
+
+    def write_row(self, row=[]):
+        with open(self.path, "a+") as f:
+            cw = csv.writer(f)
+            cw.writerow(row)
+

 def get_device_name():
    out = subprocess.run("rocminfo",
@@ -17,37 +27,115 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
        if disable_fusion:
            env_vars += "MIGRAPHX_DISABLE_CK_FUSION=1 "
    int8_str = "--int8" if int8 else ""
-    cmd = "{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8}  --exhaustive-tune".format(
-            env_vars=env_vars,
-            model=model,
-            batch_size=str(batch_size),
-            int8=int8_str
-    )
+    cmd = f"{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8_str}  --exhaustive-tune"
    out = subprocess.run(cmd,
                         capture_output=True,
                         check=True,
                         shell=True)
+
    summary = re.findall("Summary.*", str(out.stdout))[0].replace("\\n", "\n")
    total_time = re.findall("Total time: \d+\.\d*", summary)[0]
    total_time = total_time.replace("Total time: ", "")

-    print(summary)
-    print(total_time)
-    with open("summaries.txt", "w+") as f:
+    ck_gemm_time = re.findall("ck_gemm_kernel: \d+\.\d*", summary)
+    if ck_gemm_time:
+        ck_gemm_time = re.findall("\d+\.\d*", ck_gemm_time[0])[0]
+    else:
+        ck_gemm_time = "0.0"
+    
+    rb_gemm_time = re.findall("gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*", summary)
+    if rb_gemm_time:
+        rb_gemm_time = re.findall("\d+\.\d*", rb_gemm_time[0])[0]
+    else:
+        rb_gemm_time = "0.0"
+    
+    gemm_pack_time = re.findall("gpu::int8_gemm_pack_a: \d+\.\d*", summary)
+    if gemm_pack_time:
+        gemm_pack_time = re.findall("\d+\.\d*", gemm_pack_time[0])[0]
+    else:
+        gemm_pack_time = "0.0"
+
+    gemm_times = [ck_gemm_time, rb_gemm_time, gemm_pack_time]
+    total_gemm_time = [str(sum(map(float, gemm_times)))]
+    gemm_times.extend(total_gemm_time)
+
+    print(cmd)
+    print(total_time + "ms")
+    with open("perf_summaries.txt", "a+") as f:
        f.write(cmd + "\n")
        f.write(summary + "\n\n")
+    
+    return [total_time] + gemm_times

+def run_ck_perf(model, batch_size, int8=False, use_large_k=False):
+    # CK with fusions 
+    total_time = run_perf(model, batch_size, int8, True, use_large_k, False)[0]
+    # CK without fusions 
+    gemm_times = run_perf(model, batch_size, int8, True, use_large_k, True)

-# run model with:
-#    RocBlas 
-#        Get gemm info
-#    CK
-#        With fusions
-#        Without fusions
+    return [total_time] + gemm_times[1:]



 if __name__ == "__main__":
    device_id = get_device_name()
    model = "/code/bert_base_cased_1_fp16_gpu.onnx"
-    run_perf(model, 1, True, True, True, True)
\ No newline at end of file
+    cf = CSVFile()
+    cf.write_row([str(datetime.datetime.now())])
+    cf.write_row([device_id])
+    cf.write_row([model])
+    headers = ["", "Total Time (ms)", "CK GEMM Time (ms)", "RB GEMM Time (ms)", "GEMM Pack Time (ms)", "Total GEMM Time (ms)"]
+
+    batch_size = "1"
+    # int8:
+    quantize = True
+    label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
+    cf.write_row([label])
+    cf.write_row(headers)
+    # CK Only
+    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
+    # CK + rocBLAS (k>2048)
+    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    # rocBLAS Only
+    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
+    cf.write_row()
+
+    # fp16:
+    quantize = False
+    label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
+    cf.write_row([label])
+    cf.write_row(headers)
+    # CK Only
+    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
+    # CK + rocBLAS (k>2048)
+    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    # rocBLAS Only
+    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
+    cf.write_row()
+
+    batch_size = "64"
+    # int8:
+    quantize = True
+    label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
+    cf.write_row([label])
+    cf.write_row(headers)
+    # CK Only
+    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
+    # CK + rocBLAS (k>2048)
+    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    # rocBLAS Only
+    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
+    cf.write_row()
+
+    # fp16:
+    quantize = False
+    label = f"Int8 / BatchSize: {batch_size}" if quantize else f"FP16 / BatchSize: {batch_size}"
+    cf.write_row([label])
+    cf.write_row(headers)
+    # CK Only
+    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
+    # CK + rocBLAS (k>2048)
+    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    # rocBLAS Only
+    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
+    cf.write_row()
\ No newline at end of file