Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
b219ff00
Commit
b219ff00
authored
Nov 16, 2020
by
Jared Casper
Browse files
Update code used for finetuning to latest API.
parent
b4b0d739
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
7 deletions
+5
-7
megatron/model/classification.py
megatron/model/classification.py
+1
-2
megatron/model/multiple_choice.py
megatron/model/multiple_choice.py
+1
-2
tasks/finetune_utils.py
tasks/finetune_utils.py
+3
-3
No files found.
megatron/model/classification.py
View file @
b219ff00
...
...
@@ -52,8 +52,7 @@ class Classification(MegatronModule):
def
forward
(
self
,
input_ids
,
attention_mask
,
tokentype_ids
):
extended_attention_mask
=
bert_extended_attention_mask
(
attention_mask
,
next
(
self
.
language_model
.
parameters
()).
dtype
)
extended_attention_mask
=
bert_extended_attention_mask
(
attention_mask
)
position_ids
=
bert_position_ids
(
input_ids
)
_
,
pooled_output
=
self
.
language_model
(
input_ids
,
...
...
megatron/model/multiple_choice.py
View file @
b219ff00
...
...
@@ -64,8 +64,7 @@ class MultipleChoice(MegatronModule):
attention_mask
=
attention_mask
.
view
(
-
1
,
attention_mask
.
size
(
-
1
))
tokentype_ids
=
tokentype_ids
.
view
(
-
1
,
tokentype_ids
.
size
(
-
1
))
extended_attention_mask
=
bert_extended_attention_mask
(
attention_mask
,
next
(
self
.
language_model
.
parameters
()).
dtype
)
extended_attention_mask
=
bert_extended_attention_mask
(
attention_mask
)
position_ids
=
bert_position_ids
(
input_ids
)
_
,
pooled_output
=
self
.
language_model
(
input_ids
,
...
...
tasks/finetune_utils.py
View file @
b219ff00
...
...
@@ -161,7 +161,7 @@ def _train(model, optimizer, lr_scheduler, forward_step,
start_iteration
=
0
# Train for one step.
losses_dict
,
_
=
train_step
(
forward_step
,
batch
,
model
,
losses_dict
,
skipped_iter
=
train_step
(
forward_step
,
batch
,
model
,
optimizer
,
lr_scheduler
)
iteration
+=
1
...
...
@@ -169,7 +169,7 @@ def _train(model, optimizer, lr_scheduler, forward_step,
report_memory_flag
=
training_log
(
losses_dict
,
losses_dict_sum
,
optimizer
.
param_groups
[
0
][
'lr'
],
iteration
,
optimizer
.
loss_scale
,
report_memory_flag
)
report_memory_flag
,
skipped_iter
)
# Autoresume
if
args
.
adlr_autoresume
and
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment