Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
TransformerEngine
Commits
f0311a18
Unverified
Commit
f0311a18
authored
May 20, 2024
by
Pavel Shamis (Pasha)
Committed by
GitHub
May 20, 2024
Browse files
[UB] Fixing consistency of error messages. (#840)
parent
115a27ef
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
25 deletions
+25
-25
transformer_engine/pytorch/csrc/userbuffers/userbuffers.cu
transformer_engine/pytorch/csrc/userbuffers/userbuffers.cu
+25
-25
No files found.
transformer_engine/pytorch/csrc/userbuffers/userbuffers.cu
View file @
f0311a18
...
...
@@ -153,7 +153,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Allreduce Gather: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Allreduce Gather: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -188,8 +188,8 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d ]Allreduce reduce-scatter:SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
UB_PRINT
(
"[%d ]Allreduce reduce-scatter:SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
}
...
...
@@ -237,7 +237,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Allreduce gather: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Allreduce gather: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -300,7 +300,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -376,7 +376,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -450,7 +450,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
clock64
()
-
s
>
TIMEOUT
)
{
UB_PRINT
(
"Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
blockIdx
.
x
,
threadIdx
.
x
,
UB_PRINT
(
"Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -518,7 +518,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
clock64
()
-
s
>
2ull
*
TIMEOUT
)
{
UB_PRINT
(
"Allgather: SM %d [%d]:expecting %d got %d"
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
UB_PRINT
(
"Allgather: SM %d [%d]:
expecting %d got %d"
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -554,7 +554,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -640,7 +640,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -771,8 +771,8 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Allgather: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
UB_PRINT
(
"[%d] Allgather: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
}
...
...
@@ -837,7 +837,7 @@ __global__ void __launch_bounds__(MAX_THREADS) userbuffers_fp16_sum_inplace_gpu_
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -926,7 +926,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -1014,7 +1014,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -1109,7 +1109,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -1205,7 +1205,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
UB_PRINT
(
"[%d] Reduce-scatter: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -1324,7 +1324,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Allgather: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
UB_PRINT
(
"[%d] Allgather: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -1422,7 +1422,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
reduce_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"[%d] Allgather: SM %d [%d]:expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
UB_PRINT
(
"[%d] Allgather: SM %d [%d]:
expecting %d got %d"
,
myrank
,
blockIdx
.
x
,
threadIdx
.
x
,
reduce_id
,
*
flag
);
break
;
}
...
...
@@ -2004,7 +2004,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
signal_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"pullrecv [grank dst:%d global src:%d][nvrank(GPU) dst: %d src: %d]: expect
ed
%d,"
UB_PRINT
(
"pullrecv [grank dst:%d global src:%d][nvrank(GPU) dst: %d src: %d]: expect
ing
%d,"
" observed %d"
,
myrank
,
peer
,
nvrank
,
nvpeer
,
signal_id
,
*
flag
);
break
;
}
...
...
@@ -2078,8 +2078,8 @@ __global__ void kuserbuffers_pushrecv(int myrank, int peer, int nvrank, int nvpe
clock_t
s
=
clock64
();
while
(
CHECK_IDS
(
*
flag
,
signal_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"pushrecv [grank dst:%d global src:%d][nvrank(GPU) dst: %d src: %d]
: "
"expect
ed
%d, observed %d"
,
myrank
,
peer
,
nvrank
,
nvpeer
,
signal_id
,
*
flag
);
UB_PRINT
(
"pushrecv [grank dst:%d global src:%d][nvrank(GPU) dst: %d src: %d]: "
"expect
ing
%d, observed %d"
,
myrank
,
peer
,
nvrank
,
nvpeer
,
signal_id
,
*
flag
);
if
(
CHECK_CE
(
ce_start_ptr
,
ce_end_ptr
))
UB_PRINT
(
"pushrecv: CE deadlock DETECTED: %d (ce_start) != %d (ce_end)
\n
"
,
*
ce_start_ptr
,
*
ce_end_ptr
);
...
...
@@ -2135,7 +2135,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
while
(
CHECK_IDS
(
*
flag
,
signal_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"pushsendrecv [sending peer:%d receiving peer:%d][nvrank(GPU) sending peer: %d"
" receiving peer: %d]: expect
ed
%d, observed %d"
,
" receiving peer: %d]: expect
ing
%d, observed %d"
,
send_peer
,
recv_peer
,
nv_send
,
nv_recv
,
signal_id
,
*
flag
);
if
(
CHECK_CE
(
ce_start_ptr
,
ce_end_ptr
))
UB_PRINT
(
"pushrecv: CE deadlock DETECTED: %d (ce_start) != %d (ce_end)
\n
"
,
...
...
@@ -2192,7 +2192,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
while
(
CHECK_IDS
(
*
flag
,
signal_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"pushsendrecv atomic [sending peer:%d receiving peer:%d][nvrank(GPU) sending peer:"
" %d receiving peer: %d]: expect
ed
%d, observed %d"
,
" %d receiving peer: %d]: expect
ing
%d, observed %d"
,
send_peer
,
recv_peer
,
nv_send
,
nv_recv
,
signal_id
,
*
flag
);
/*return;*/
if
(
CHECK_CE
(
ce_start_ptr
,
ce_end_ptr
))
UB_PRINT
(
"pushsendrecv atomic: CE deadlock DETECTED: %d (ce_start) != %d (ce_end)
\n
"
,
...
...
@@ -2263,7 +2263,7 @@ __global__ void __launch_bounds__(MAX_THREADS)
while
(
CHECK_IDS
(
*
flag
,
signal_id
))
{
if
(
CHECK_TIMEOUT
(
s
,
ub_timeout
))
{
UB_PRINT
(
"pushsendrecv multiatomic [sending peer:%d receiving peer:%d][nvrank(GPU)"
" sending peer: %d receiving peer: %d]: expect
ed
%d, observed %d"
,
" sending peer: %d receiving peer: %d]: expect
ing
%d, observed %d"
,
send_peer
,
recv_peer
,
nv_send
,
nv_recv
,
signal_id
,
*
flag
);
/*return;*/
// CE mode is not supported for multi-atomic, so there is no need to check for a deadlock
return
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment