Refactoring of performance measurement.

24b7b354 · Adam Osewski · ad00dd1f · 24b7b354 · 24b7b354
Commit 24b7b354 authored Aug 25, 2022 by Adam Osewski
2 changed files
--- a/example/15_grouped_gemm/run_grouped_gemm_example.inc
+++ b/example/15_grouped_gemm/run_grouped_gemm_example.inc
@@ -178,12 +178,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
            "not support this GEMM problem");
    }

-    float ave_time   = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
-    float tflops     = static_cast<float>(flop) / 1.E9 / ave_time;
-    float gb_per_sec = num_btype / 1.E6 / ave_time;
-
-    std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
-              << gemm.GetTypeString() << std::endl;
+    invoker.Run(argument, StreamConfig{nullptr, false});

    bool pass = true;
    if(config.do_verification)
@@ -221,7 +216,17 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
        }
    }

-    return pass ? 0 : 1;
+    if(config.time_kernel)
+    {
+        float ave_time   = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
+        float tflops     = static_cast<float>(flop) / 1.E9 / ave_time;
+        float gb_per_sec = num_btype / 1.E6 / ave_time;
+
+        std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
+                  << " GB/s, " << gemm.GetTypeString() << std::endl;
+    }
+
+    return pass;
 }

 bool run_grouped_gemm_example(int argc, char* argv[])

--- a/example/24_batched_gemm/run_batched_gemm_example.inc
+++ b/example/24_batched_gemm/run_batched_gemm_example.inc
@@ -145,18 +145,7 @@ bool run_batched_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
            "not support this GEMM problem");
    }

-    float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
-
-    std::size_t flop      = std::size_t(2) * batch_count * M * N * K;
-    std::size_t num_btype = sizeof(ADataType) * batch_count * M * K +
-                            sizeof(BDataType) * batch_count * K * N +
-                            sizeof(EDataType) * batch_count * M * N;
-
-    float tflops     = static_cast<float>(flop) / 1.E9 / ave_time;
-    float gb_per_sec = num_btype / 1.E6 / ave_time;
-    std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
-              << gemm.GetTypeString() << std::endl;
-
+    invoker.Run(argument, StreamConfig{nullptr, false});
    bool pass = true;

    if(config.do_verification)
@@ -193,6 +182,21 @@ bool run_batched_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
 #endif
    }

+    if(config.time_kernel)
+    {
+        float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
+
+        std::size_t flop      = std::size_t(2) * batch_count * M * N * K;
+        std::size_t num_btype = sizeof(ADataType) * batch_count * M * K +
+                                sizeof(BDataType) * batch_count * K * N +
+                                sizeof(EDataType) * batch_count * M * N;
+
+        float tflops     = static_cast<float>(flop) / 1.E9 / ave_time;
+        float gb_per_sec = num_btype / 1.E6 / ave_time;
+        std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
+                  << " GB/s, " << gemm.GetTypeString() << std::endl;
+    }
+
    return pass ? 0 : 1;
 }