Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wuxk1
Megatron-LM
Commits
8fc5e323
Commit
8fc5e323
authored
Jan 13, 2022
by
Lawrence McAfee
Browse files
more cleanup
parent
c1e4526b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
60 deletions
+9
-60
megatron/schedules.py
megatron/schedules.py
+9
-60
No files found.
megatron/schedules.py
View file @
8fc5e323
...
...
@@ -28,10 +28,6 @@ from megatron.model import DistributedDataParallel as LocalDDP
from
megatron.model
import
Float16Module
from
megatron.model
import
ModelType
# >>>
from
lutil
import
pax
,
tp
,
KEY_RANK
# <<<
def
get_forward_backward_func
():
args
=
get_args
()
if
mpu
.
get_pipeline_model_parallel_world_size
()
>
1
:
...
...
@@ -46,36 +42,6 @@ def get_forward_backward_func():
forward_backward_func
=
forward_backward_no_pipelining
return
forward_backward_func
# >>>
# def free_output_tensor(output_tensors, deallocate_pipeline_outputs):
# '''Pseudo-free (i.e., set to scalar) the output tensor's '.data' field.
# This method should be called right after the output tensor has been
# sent to the next pipeline stage. At this point, the output tensor is
# only useful for its '.grad_fn' field, and not its '.data'.
# '''
# # >>>
# # raise Exception("hi.")
# # <<<
# if not deallocate_pipeline_outputs or output_tensors is None:
# return
# if isinstance(output_tensors, torch.Tensor):
# output_tensors = [output_tensors]
# for output_tensor in output_tensors:
# # >>>
# # if output_tensor.nelement() < 10:
# # # raise Exception("interesting.")
# # continue
# # <<<
# # >>>
# # output_tensor.data = torch.cuda.FloatTensor([0])
# output_tensor.data = torch.empty(
# (1,),
# device = torch.cuda.current_device(),
# dtype = output_tensor.dtype,
# )
# # <<<
# <<<
def
deallocate_output_tensor
(
out
):
'''Pseudo-deallocate (i.e., set to scalar) the output tensor's '.data' field.
...
...
@@ -118,21 +84,15 @@ def custom_backward(output, grad_output):
)
# Call c++ engine [ see torch/csrc/autograd/python_engine.cpp ]
# >>>
try
:
Variable
.
_execution_engine
.
run_backward
(
tensors
=
(
output
,),
grad_tensors
=
(
grad_output
,),
keep_graph
=
False
,
create_graph
=
False
,
inputs
=
tuple
(),
allow_unreachable
=
True
,
accumulate_grad
=
True
,
)
except
Exception
as
e
:
print
(
">>>> rank = %d. <<<<"
%
torch
.
distributed
.
get_rank
())
raise
e
# <<<
Variable
.
_execution_engine
.
run_backward
(
tensors
=
(
output
,),
grad_tensors
=
(
grad_output
,),
keep_graph
=
False
,
create_graph
=
False
,
inputs
=
tuple
(),
allow_unreachable
=
True
,
accumulate_grad
=
True
,
)
def
forward_step
(
forward_step_func
,
data_iterator
,
model
,
input_tensor
,
losses_reduced
):
...
...
@@ -163,14 +123,6 @@ def forward_step(forward_step_func, data_iterator, model, input_tensor, losses_r
losses_reduced
.
append
(
loss_reduced
)
timers
(
'forward-compute'
).
stop
()
# >>>
# if torch.distributed.get_rank() == 4:
# pax(4, {
# "output_tensor" : tp(output_tensor),
# "input_tensor[-1]" : tp(input_tensor[-1]),
# })
# <<<
# If T5 model (or other model with encoder and decoder)
# and in decoder stack, then send encoder_hidden_state
# downstream as well.
...
...
@@ -425,9 +377,6 @@ def forward_backward_pipelining_with_interleaving(forward_step_func, data_iterat
tensor_shape
=
tensor_shape
,
timers
=
timers
)
input_tensors
[
next_forward_model_chunk_id
].
append
(
input_tensor
)
# >>>
pax
({
"output_tensor"
:
output_tensor
})
# <<<
deallocate_output_tensor
(
output_tensor
)
# Run 1F1B in steady state.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment