Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
5444687f
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "e34e45536fb8ae7f4d966e3604e3ca5f77e2b486"
Unverified
Commit
5444687f
authored
Feb 21, 2022
by
Ivan Agarský
Committed by
GitHub
Feb 21, 2022
Browse files
Fix minor comment typos (#15740)
parent
a63bd367
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
3 deletions
+3
-3
examples/research_projects/distillation/train.py
examples/research_projects/distillation/train.py
+3
-3
No files found.
examples/research_projects/distillation/train.py
View file @
5444687f
...
@@ -133,7 +133,7 @@ def main():
...
@@ -133,7 +133,7 @@ def main():
"--alpha_mlm"
,
"--alpha_mlm"
,
default
=
0.0
,
default
=
0.0
,
type
=
float
,
type
=
float
,
help
=
"Linear weight for the MLM loss. Must be >=0. Should be used in co
o
njunction with `mlm` flag."
,
help
=
"Linear weight for the MLM loss. Must be >=0. Should be used in conjunction with `mlm` flag."
,
)
)
parser
.
add_argument
(
"--alpha_clm"
,
default
=
0.5
,
type
=
float
,
help
=
"Linear weight for the CLM loss. Must be >=0."
)
parser
.
add_argument
(
"--alpha_clm"
,
default
=
0.5
,
type
=
float
,
help
=
"Linear weight for the CLM loss. Must be >=0."
)
parser
.
add_argument
(
"--alpha_mse"
,
default
=
0.0
,
type
=
float
,
help
=
"Linear weight of the MSE loss. Must be >=0."
)
parser
.
add_argument
(
"--alpha_mse"
,
default
=
0.0
,
type
=
float
,
help
=
"Linear weight of the MSE loss. Must be >=0."
)
...
@@ -164,7 +164,7 @@ def main():
...
@@ -164,7 +164,7 @@ def main():
parser
.
add_argument
(
parser
.
add_argument
(
"--restrict_ce_to_mask"
,
"--restrict_ce_to_mask"
,
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"If true, compute the distilation loss only the [MLM] prediction distribution."
,
help
=
"If true, compute the distil
l
ation loss only the [MLM] prediction distribution."
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--freeze_pos_embs"
,
"--freeze_pos_embs"
,
...
@@ -192,7 +192,7 @@ def main():
...
@@ -192,7 +192,7 @@ def main():
help
=
"Gradient accumulation for larger training batches."
,
help
=
"Gradient accumulation for larger training batches."
,
)
)
parser
.
add_argument
(
"--warmup_prop"
,
default
=
0.05
,
type
=
float
,
help
=
"Linear warmup proportion."
)
parser
.
add_argument
(
"--warmup_prop"
,
default
=
0.05
,
type
=
float
,
help
=
"Linear warmup proportion."
)
parser
.
add_argument
(
"--weight_decay"
,
default
=
0.0
,
type
=
float
,
help
=
"Weight deay if we apply some."
)
parser
.
add_argument
(
"--weight_decay"
,
default
=
0.0
,
type
=
float
,
help
=
"Weight de
c
ay if we apply some."
)
parser
.
add_argument
(
"--learning_rate"
,
default
=
5e-4
,
type
=
float
,
help
=
"The initial learning rate for Adam."
)
parser
.
add_argument
(
"--learning_rate"
,
default
=
5e-4
,
type
=
float
,
help
=
"The initial learning rate for Adam."
)
parser
.
add_argument
(
"--adam_epsilon"
,
default
=
1e-6
,
type
=
float
,
help
=
"Epsilon for Adam optimizer."
)
parser
.
add_argument
(
"--adam_epsilon"
,
default
=
1e-6
,
type
=
float
,
help
=
"Epsilon for Adam optimizer."
)
parser
.
add_argument
(
"--max_grad_norm"
,
default
=
5.0
,
type
=
float
,
help
=
"Max gradient norm."
)
parser
.
add_argument
(
"--max_grad_norm"
,
default
=
5.0
,
type
=
float
,
help
=
"Max gradient norm."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment