Merge branch 'master' of https://github.com/huggingface/pytorch-pretrained-BERT

139873f6 · thomwolf · 04287a4d · a1af5247 · 139873f6 · 139873f6
Commit 139873f6 authored Nov 03, 2018 by thomwolf
Show whitespace changes
Inline Side-by-side

Showing with 22 additions and 5 deletions

run_classifier_pytorch.py run_classifier_pytorch.py +13 -5

run_squad_pytorch.py run_squad_pytorch.py +9 -0

No files found.
--- a/run_classifier_pytorch.py
+++ b/run_classifier_pytorch.py
@@ -427,7 +427,10 @@ def main():
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
+    parser.add_argument('--seed', 
+                        type=int, 
+                        default=42,
+                        help="random seed for initialization")
    args = parser.parse_args()
    processors = {
@@ -445,6 +448,11 @@ def main():
        # print("Initializing the distributed backend: NCCL")
    print("device", device, "n_gpu", n_gpu)
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    if n_gpu>0: torch.cuda.manual_seed_all(args.seed)
    if not args.do_train and not args.do_eval:
        raise ValueError("At least one of `do_train` or `do_eval` must be True.")
@@ -529,10 +537,10 @@ def main():
                label_ids = label_ids.to(device)
                loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
-                total_tr_loss += loss.item()
+                total_tr_loss += loss.sum().item() # sum() is to account for multi-gpu support.
                nb_tr_examples += input_ids.size(0)
                model.zero_grad()
-                loss.backward()
+                loss.sum().backward() # sum() is to account for multi-gpu support.
                optimizer.step()
                global_step += 1
@@ -573,7 +581,7 @@ def main():
            label_ids = label_ids.to('cpu').numpy()
            tmp_eval_accuracy = accuracy(logits, label_ids)
-            eval_loss += tmp_eval_loss.item()
+            eval_loss += tmp_eval_loss.sum().item()
            eval_accuracy += tmp_eval_accuracy
            nb_eval_examples += input_ids.size(0)

--- a/run_squad_pytorch.py
+++ b/run_squad_pytorch.py
@@ -745,6 +745,10 @@ def main():
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
+    parser.add_argument('--seed', 
+                    type=int, 
+                    default=42,
+                    help="random seed for initialization")
    args = parser.parse_args()
@@ -757,6 +761,11 @@ def main():
        # print("Initializing the distributed backend: NCCL")
    print("device", device, "n_gpu", n_gpu)
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    if n_gpu>0: torch.cuda.manual_seed_all(args.seed)
    if not args.do_train and not args.do_predict:
        raise ValueError("At least one of `do_train` or `do_predict` must be True.")