[Fix][Unittest] Fix test stream (#4635)

* fix test stream * init cusparse handle

[Fix][Unittest] Fix test stream (#4635)
* fix test stream * init cusparse handle
188bc2bf · Xin Yao · GitHub · d78a3a4b · 188bc2bf
Unverified Commit 188bc2bf authored Sep 27, 2022 by Xin Yao Committed by GitHub Sep 27, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 6 deletions

tests/pytorch/test_stream.py tests/pytorch/test_stream.py +14 -6

No files found.
--- a/tests/pytorch/test_stream.py
+++ b/tests/pytorch/test_stream.py
@@ -107,8 +107,12 @@ def test_record_stream_graph_positive():
    cycles_per_ms = _get_cycles_per_ms()
    g = rand_graph(10, 20, device=F.cpu())
-    x = torch.ones(g.num_nodes(), 10)
+    g.create_formats_()
-    result = OPS.copy_u_sum(g, x).to(F.ctx())
+    x = torch.ones(g.num_nodes(), 10).to(F.ctx())
+    g1 = g.to(F.ctx())
+    # this is necessary to initialize the cusparse handle
+    result = OPS.copy_u_sum(g1, x)
+    torch.cuda.current_stream().synchronize()
    stream = torch.cuda.Stream()
    results2 = torch.zeros_like(result)
@@ -121,11 +125,11 @@ def test_record_stream_graph_positive():
        torch.cuda._sleep(int(50 * cycles_per_ms))  # delay the computing
        results2.copy_(OPS.copy_u_sum(g2, x))
-    x = x.to(F.ctx())
    perform_computing()
    with torch.cuda.stream(stream):
        # since we have called record stream for g2, g3 won't reuse its memory
        g3 = rand_graph(10, 20, device=F.ctx())
+        g3.create_formats_()
    torch.cuda.current_stream().synchronize()
    assert torch.equal(result, results2)
@@ -134,8 +138,12 @@ def test_record_stream_graph_negative():
    cycles_per_ms = _get_cycles_per_ms()
    g = rand_graph(10, 20, device=F.cpu())
-    x = torch.ones(g.num_nodes(), 10)
+    g.create_formats_()
-    result = OPS.copy_u_sum(g, x).to(F.ctx())
+    x = torch.ones(g.num_nodes(), 10).to(F.ctx())
+    g1 = g.to(F.ctx())
+    # this is necessary to initialize the cusparse handle
+    result = OPS.copy_u_sum(g1, x)
+    torch.cuda.current_stream().synchronize()
    stream = torch.cuda.Stream()
    results2 = torch.zeros_like(result)
@@ -149,11 +157,11 @@ def test_record_stream_graph_negative():
        torch.cuda._sleep(int(50 * cycles_per_ms))  # delay the computing
        results2.copy_(OPS.copy_u_sum(g2, x))
-    x = x.to(F.ctx())
    perform_computing()
    with torch.cuda.stream(stream):
        # g3 will reuse g2's memory block, resulting a wrong result
        g3 = rand_graph(10, 20, device=F.ctx())
+        g3.create_formats_()
    torch.cuda.current_stream().synchronize()
    assert not torch.equal(result, results2)