Formatting

028280dc · Alan Turner · e1bd6573 · 028280dc · 028280dc
Commit 028280dc authored Aug 01, 2023 by Alan Turner
Hide whitespace changes
Inline Side-by-side

Showing with 44 additions and 27 deletions

src/rewrite_quantization.cpp src/rewrite_quantization.cpp +11 -7

tools/gemm_perf.py tools/gemm_perf.py +33 -20

No files found.
--- a/src/rewrite_quantization.cpp
+++ b/src/rewrite_quantization.cpp
@@ -64,23 +64,26 @@ void apply_quantizelinear(module& m, instruction_ref ins)
        max_quant = qt.max();
        min_quant = qt.min();
    });
-    if (enabled(MIGRAPHX_BROADCAST_Q{}))
+    if(enabled(MIGRAPHX_BROADCAST_Q{}))
    {
        auto s       = add_zero_point->get_shape();
        auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
        auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
        // auto min_mbcast =
-        //     m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
+        //     m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
+        //     min_arg);
        // auto max_mbcast =
-        //     m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
-        
+        //     m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
+        //     max_arg);
+
        // auto saturate =
        //     m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
-        auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
+        auto saturate =
+            insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
        m.replace_instruction(
            ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
    }
-    else 
+    else
    {
        auto s = add_zero_point->get_shape();
        std::vector<int> min_data(s.elements(), min_quant);
@@ -88,7 +91,8 @@ void apply_quantizelinear(module& m, instruction_ref ins)
        auto min_arg = m.add_literal(literal(s, min_data));
        auto max_arg = m.add_literal(literal(s, max_data));

-        auto saturate = m.insert_instruction(ins, make_op("clip"), add_zero_point, min_arg, max_arg);
+        auto saturate =
+            m.insert_instruction(ins, make_op("clip"), add_zero_point, min_arg, max_arg);
        m.replace_instruction(
            ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
    }

--- a/tools/gemm_perf.py
+++ b/tools/gemm_perf.py
 import subprocess, csv, re, datetime

+
 class CSVFile:
+
    def __init__(self, path="output.csv"):
        self.path = path

@@ -18,20 +20,23 @@ def get_device_name():
    matches = re.findall("gfx\d*[a-z]*", str(out.stdout))
    return matches[0]

-def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, disable_fusion=False):
+
+def run_perf(model,
+             batch_size,
+             int8=False,
+             use_ck=False,
+             use_large_k=False,
+             disable_fusion=False):
    env_vars = ""
    if use_ck:
-        env_vars += "MIGRAPHX_ENABLE_CK=1 " 
+        env_vars += "MIGRAPHX_ENABLE_CK=1 "
        if use_large_k:
-            env_vars += "MIGRAPHX_USE_LARGE_K=1 " 
+            env_vars += "MIGRAPHX_USE_LARGE_K=1 "
        if disable_fusion:
            env_vars += "MIGRAPHX_DISABLE_CK_FUSION=1 "
    int8_str = "--int8" if int8 else ""
    cmd = f"{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8_str}  --exhaustive-tune"
-    out = subprocess.run(cmd,
-                         capture_output=True,
-                         check=True,
-                         shell=True)
+    out = subprocess.run(cmd, capture_output=True, check=True, shell=True)

    summary = re.findall("Summary.*", str(out.stdout))[0].replace("\\n", "\n")
    total_time = re.findall("Total time: \d+\.\d*", summary)[0]
@@ -42,13 +47,14 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
        ck_gemm_time = re.findall("\d+\.\d*", ck_gemm_time[0])[0]
    else:
        ck_gemm_time = "0.0"
-    
-    rb_gemm_time = re.findall("gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*", summary)
+
+    rb_gemm_time = re.findall("gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*",
+                              summary)
    if rb_gemm_time:
        rb_gemm_time = re.findall("\d+\.\d*", rb_gemm_time[0])[0]
    else:
        rb_gemm_time = "0.0"
-    
+
    gemm_pack_time = re.findall("gpu::int8_gemm_pack_a: \d+\.\d*", summary)
    if gemm_pack_time:
        gemm_pack_time = re.findall("\d+\.\d*", gemm_pack_time[0])[0]
@@ -64,19 +70,19 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
    with open("perf_summaries.txt", "a+") as f:
        f.write(cmd + "\n")
        f.write(summary + "\n\n")
-    
+
    return [total_time] + gemm_times

+
 def run_ck_perf(model, batch_size, int8=False, use_large_k=False):
-    # CK with fusions 
+    # CK with fusions
    total_time = run_perf(model, batch_size, int8, True, use_large_k, False)[0]
-    # CK without fusions 
+    # CK without fusions
    gemm_times = run_perf(model, batch_size, int8, True, use_large_k, True)

    return [total_time] + gemm_times[1:]


-
 if __name__ == "__main__":
    device_id = get_device_name()
    model = "/code/bert_base_cased_1_fp16_gpu.onnx"
@@ -84,7 +90,10 @@ if __name__ == "__main__":
    cf.write_row([str(datetime.datetime.now())])
    cf.write_row([device_id])
    cf.write_row([model])
-    headers = ["", "Total Time (ms)", "CK GEMM Time (ms)", "RB GEMM Time (ms)", "GEMM Pack Time (ms)", "Total GEMM Time (ms)"]
+    headers = [
+        "", "Total Time (ms)", "CK GEMM Time (ms)", "RB GEMM Time (ms)",
+        "GEMM Pack Time (ms)", "Total GEMM Time (ms)"
+    ]

    batch_size = "1"
    # int8:
@@ -95,7 +104,8 @@ if __name__ == "__main__":
    # CK Only
    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
    # CK + rocBLAS (k>2048)
-    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    cf.write_row(["CK + rocBLAS(k>2048)"] +
+                 run_ck_perf(model, batch_size, quantize, False))
    # rocBLAS Only
    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
    cf.write_row()
@@ -108,7 +118,8 @@ if __name__ == "__main__":
    # CK Only
    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
    # CK + rocBLAS (k>2048)
-    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    cf.write_row(["CK + rocBLAS(k>2048)"] +
+                 run_ck_perf(model, batch_size, quantize, False))
    # rocBLAS Only
    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
    cf.write_row()
@@ -122,7 +133,8 @@ if __name__ == "__main__":
    # CK Only
    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
    # CK + rocBLAS (k>2048)
-    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    cf.write_row(["CK + rocBLAS(k>2048)"] +
+                 run_ck_perf(model, batch_size, quantize, False))
    # rocBLAS Only
    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
    cf.write_row()
@@ -135,7 +147,8 @@ if __name__ == "__main__":
    # CK Only
    cf.write_row(["CK"] + run_ck_perf(model, batch_size, quantize, True))
    # CK + rocBLAS (k>2048)
-    cf.write_row(["CK + rocBLAS(k>2048)"] + run_ck_perf(model, batch_size, quantize, False))
+    cf.write_row(["CK + rocBLAS(k>2048)"] +
+                 run_ck_perf(model, batch_size, quantize, False))
    # rocBLAS Only
    cf.write_row(["rocBLAS"] + run_perf(model, batch_size, quantize))
-    cf.write_row()
\ No newline at end of file
+    cf.write_row()