Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
evt_fugx1
dcu_megatron
Commits
97929735
Commit
97929735
authored
Jun 19, 2025
by
dongcl
Browse files
reorder pipeline
parent
1d497357
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
16 deletions
+4
-16
dcu_megatron/core/models/gpt/fine_grained_schedule.py
dcu_megatron/core/models/gpt/fine_grained_schedule.py
+4
-16
No files found.
dcu_megatron/core/models/gpt/fine_grained_schedule.py
View file @
97929735
...
...
@@ -632,6 +632,10 @@ def schedule_layer_1f1b(
b_grad
=
pre_backward
()
del
pre_backward
if
f_layer
is
not
None
:
with
f_context
:
f_input
=
f_layer
.
attn
.
forward
(
f_input
)
if
b_layer
is
not
None
:
with
b_context
:
routed_expert_output_grad
,
shared_expert_output_grad
=
b_layer
.
combine
.
backward
(
b_grad
)
...
...
@@ -640,10 +644,6 @@ def schedule_layer_1f1b(
pre_backward_dw
()
del
pre_backward_dw
if
f_layer
is
not
None
:
with
f_context
:
f_input
=
f_layer
.
attn
.
forward
(
f_input
)
f_dispatch_b_mlp_sync_event
=
None
if
f_layer
is
not
None
and
b_layer
is
not
None
:
f_dispatch_b_mlp_sync_event
=
F_DISPATCH_B_MLP_SYNC_EVENT
...
...
@@ -653,13 +653,8 @@ def schedule_layer_1f1b(
shared_expert_output
=
f_layer
.
shared_expert
.
forward
()
f_input
=
f_layer
.
dispatch
.
forward
(
f_input
,
stream_record_event
=
f_dispatch_b_mlp_sync_event
)
# if f_layer is not None:
# with f_context:
# f_input = f_layer.dispatch.forward(f_input, stream_record_event=f_dispatch_b_mlp_sync_event)
if
b_layer
is
not
None
:
with
b_context
:
# routed_expert_output_grad, shared_expert_output_grad = b_grad
b_grad
=
b_layer
.
routed_expert
.
backward
(
routed_expert_output_grad
,
stream_wait_event
=
f_dispatch_b_mlp_sync_event
)
b_layer
.
shared_expert
.
backward
(
shared_expert_output_grad
)
b_grad
=
b_layer
.
dispatch
.
backward
(
b_grad
)
...
...
@@ -669,13 +664,6 @@ def schedule_layer_1f1b(
with
f_context
:
f_input
=
f_layer
.
routed_expert
.
forward
(
f_input
)
# if b_layer is not None:
# with b_context:
# # b_grad = b_layer.dispatch.backward(b_grad)
# b_layer.shared_expert.backward(shared_expert_output_grad)
# b_layer.routed_expert.dw()
def
next_iter_pre_forward
():
if
f_layer
is
not
None
:
with
f_context
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment