Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
tilelang
Commits
3a408158
"vscode:/vscode.git/clone" did not exist on "ad88e3a32f04be0abce6cf75899e0c062381f0d8"
Unverified
Commit
3a408158
authored
Jul 20, 2025
by
Lei Wang
Committed by
GitHub
Jul 20, 2025
Browse files
[Bugfix] Added missing thread offsets and other information to reduce. (#646)
parent
b060c9f7
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
4 deletions
+5
-4
src/tl_templates/cuda/reduce.h
src/tl_templates/cuda/reduce.h
+5
-4
No files found.
src/tl_templates/cuda/reduce.h
View file @
3a408158
...
@@ -42,7 +42,8 @@ struct AllReduce {
...
@@ -42,7 +42,8 @@ struct AllReduce {
if
constexpr
(
offset
==
scale
)
{
if
constexpr
(
offset
==
scale
)
{
return
x
;
return
x
;
}
else
{
}
else
{
return
AllReduce
<
Reducer
,
offset
,
scale
>::
run
(
x
,
red_buf
);
return
AllReduce
<
Reducer
,
offset
,
scale
,
thread_offset
,
all_threads
>::
run
(
x
,
red_buf
);
}
}
}
}
...
@@ -51,7 +52,7 @@ struct AllReduce {
...
@@ -51,7 +52,7 @@ struct AllReduce {
constexpr
int
offset
=
threads
/
2
;
constexpr
int
offset
=
threads
/
2
;
if
constexpr
(
offset
>=
32
)
{
if
constexpr
(
offset
>=
32
)
{
asm
volatile
(
"bar.sync %0, %1;"
:
:
"r"
(
1
),
"r"
(
all_threads
));
asm
volatile
(
"bar.sync %0, %1;"
:
:
"r"
(
1
),
"r"
(
all_threads
));
red_buf
[
threadIdx
.
x
]
=
x
;
red_buf
[
threadIdx
.
x
-
thread_offset
]
=
x
;
// TODO(lei): maybe we can merge the two bar.sync into one?
// TODO(lei): maybe we can merge the two bar.sync into one?
asm
volatile
(
"bar.sync %0, %1;"
:
:
"r"
(
2
),
"r"
(
all_threads
));
asm
volatile
(
"bar.sync %0, %1;"
:
:
"r"
(
2
),
"r"
(
all_threads
));
x
=
Reducer
()(
x
,
red_buf
[(
threadIdx
.
x
-
thread_offset
)
^
offset
]);
x
=
Reducer
()(
x
,
red_buf
[(
threadIdx
.
x
-
thread_offset
)
^
offset
]);
...
@@ -61,8 +62,8 @@ struct AllReduce {
...
@@ -61,8 +62,8 @@ struct AllReduce {
if
constexpr
(
offset
==
scale
)
{
if
constexpr
(
offset
==
scale
)
{
return
x
;
return
x
;
}
else
{
}
else
{
return
AllReduce
<
Reducer
,
offset
,
scale
,
all_
thread
s
>::
run_hopper
(
return
AllReduce
<
Reducer
,
offset
,
scale
,
thread
_offset
,
x
,
red_buf
);
all_threads
>::
run_hopper
(
x
,
red_buf
);
}
}
}
}
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment