Fix `args.gradient_accumulation_steps` used before assigment.

290633b8 · VictorSanh · 649e9774 · 290633b8 · 290633b8
Commit 290633b8 authored Nov 04, 2018 by VictorSanh
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 2 deletions

run_classifier.py run_classifier.py +5 -1

run_squad.py run_squad.py +5 -1

No files found.
--- a/run_classifier.py
+++ b/run_classifier.py
@@ -404,6 +404,10 @@ def main():
                        type=int, 
                        default=42,
                        help="random seed for initialization")
+    parser.add_argument('--gradient_accumulation_steps',
+                        type=int,
+                        default=1,
+                        help="Number of updates steps to accumualte before performing a backward/update pass.")                       
    args = parser.parse_args()

    processors = {
@@ -469,7 +473,7 @@ def main():

    model = BertForSequenceClassification(bert_config, len(label_list))
    if args.init_checkpoint is not None:
-        model.load_state_dict(torch.load(args.init_checkpoint, map_location='cpu'))
+        model.bert.load_state_dict(torch.load(args.init_checkpoint, map_location='cpu'))
    model.to(device)

    if args.local_rank != -1:

--- a/run_squad.py
+++ b/run_squad.py
@@ -739,7 +739,11 @@ def main():
                        type=int, 
                        default=42,
                        help="random seed for initialization")
-
+    parser.add_argument('--gradient_accumulation_steps',
+                        type=int,
+                        default=1,
+                        help="Number of updates steps to accumualte before performing a backward/update pass.")
+    
    args = parser.parse_args()

    if args.local_rank == -1 or args.no_cuda: