better timing

191c1e46 · Rick Ho · 365b6f01 · 191c1e46
Commit 191c1e46 authored Dec 30, 2020 by Rick Ho
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 1 deletion

pytorch/cuda/moe_test.py pytorch/cuda/moe_test.py +12 -1

No files found.
--- a/pytorch/cuda/moe_test.py
+++ b/pytorch/cuda/moe_test.py
@@ -15,19 +15,30 @@ def perf():

    moe = MOELayer(num_expert, in_feat, out_feat).cuda()

+    o = moe(inp, gate)
+    o = moe(inp, gate)
+    o = moe(inp, gate)
+    o = moe(inp, gate)
+    o = moe(inp, gate)
    o = moe(inp, gate)

    n_runs = 16
    tott = 0.
+    maxt = 0.
+    sqtot = 0.
    for i in range(n_runs):
        gate = torch.randint(low=0, high=num_expert, size=(batch_size, ), requires_grad=False).int().cuda()
        ts = time.time()
        o = moe(inp, gate)
        te = time.time()
        tott += te - ts
+        sqtot += (te - ts)**2
+        maxt = max(maxt, te - ts)

    gflops = 2e-9 * n_runs * in_feat * out_feat * batch_size / tott
-    print('Mean time {:.3f} ms, {:.3f} GFLOPs'.format(tott * 1e3 / n_runs, gflops))
+    print('Time mean/max/stdev {:.3f} {:.3f} {:.3f} ms, {:.3f} GFLOPs'.format(
+        tott * 1e3 / n_runs, maxt * 1e3, 
+        (sqtot / n_runs - (tott / n_runs)**2) * 1e3 / n_runs, gflops))


 if __name__ == '__main__':