Fix minor comment typos (#15740)

5444687f · Ivan Agarský · GitHub · a63bd367 · 5444687f
Unverified Commit 5444687f authored Feb 21, 2022 by Ivan Agarský Committed by GitHub Feb 21, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

examples/research_projects/distillation/train.py examples/research_projects/distillation/train.py +3 -3

No files found.
--- a/examples/research_projects/distillation/train.py
+++ b/examples/research_projects/distillation/train.py
@@ -133,7 +133,7 @@ def main():
        "--alpha_mlm",
        default=0.0,
        type=float,
-        help="Linear weight for the MLM loss. Must be >=0. Should be used in coonjunction with `mlm` flag.",
+        help="Linear weight for the MLM loss. Must be >=0. Should be used in conjunction with `mlm` flag.",
    )
    parser.add_argument("--alpha_clm", default=0.5, type=float, help="Linear weight for the CLM loss. Must be >=0.")
    parser.add_argument("--alpha_mse", default=0.0, type=float, help="Linear weight of the MSE loss. Must be >=0.")
@@ -164,7 +164,7 @@ def main():
    parser.add_argument(
        "--restrict_ce_to_mask",
        action="store_true",
-        help="If true, compute the distilation loss only the [MLM] prediction distribution.",
+        help="If true, compute the distillation loss only the [MLM] prediction distribution.",
    )
    parser.add_argument(
        "--freeze_pos_embs",
@@ -192,7 +192,7 @@ def main():
        help="Gradient accumulation for larger training batches.",
    )
    parser.add_argument("--warmup_prop", default=0.05, type=float, help="Linear warmup proportion.")
-    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.")
+    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
    parser.add_argument("--learning_rate", default=5e-4, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--adam_epsilon", default=1e-6, type=float, help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=5.0, type=float, help="Max gradient norm.")