Bug fix

05dd9c69 · Thor Johnsen · a5d51c01 · 05dd9c69
Commit 05dd9c69 authored Apr 01, 2022 by Thor Johnsen
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 0 deletions

apex/contrib/bottleneck/bottleneck.py apex/contrib/bottleneck/bottleneck.py +1 -0

No files found.
--- a/apex/contrib/bottleneck/bottleneck.py
+++ b/apex/contrib/bottleneck/bottleneck.py
@@ -330,6 +330,7 @@ class SpatialBottleneckFunction(torch.autograd.Function):
                # the first kernel of _forward_rest can launch.
                # At least we can overlap the two halo correction kernels.
                if spatial_group_rank < spatial_group_size-1:
+                    stream2.wait_stream(stream1) # wait for halo transfers to finish
                    stream2.wait_stream(torch.cuda.current_stream()) # wait for *_out2_mask to finish
                    with torch.cuda.stream(stream2):
                        w1by3 = args[2][:,2:3,:,:].clone()