Better explain lr and batch size in references/detection/train.py (#1233)

* explain lr and batch size in references/detection/train.py * fix typo

Better explain lr and batch size in references/detection/train.py (#1233)
* explain lr and batch size in references/detection/train.py * fix typo
19315e31 · Gu Wang · Francisco Massa · 8635be94 · 19315e31
Commit 19315e31 authored Aug 13, 2019 by Gu Wang Committed by Francisco Massa Aug 12, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 2 deletions

references/detection/train.py references/detection/train.py +8 -2

No files found.
--- a/references/detection/train.py
+++ b/references/detection/train.py
@@ -5,6 +5,9 @@ To run in a multi-gpu environment, use the distributed launcher::
    python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
        train.py ... --world-size $NGPU
+The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
+    --lr 0.02 --batch-size 2 --world-size 8
+If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
 """
 import datetime
 import os
@@ -140,12 +143,15 @@ if __name__ == "__main__":
    parser.add_argument('--dataset', default='coco', help='dataset')
    parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
    parser.add_argument('--device', default='cuda', help='device')
-    parser.add_argument('-b', '--batch-size', default=2, type=int)
+    parser.add_argument('-b', '--batch-size', default=2, type=int,
+                        help='images per gpu, the total batch size is $NGPU x batch_size')
    parser.add_argument('--epochs', default=13, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 16)')
-    parser.add_argument('--lr', default=0.02, type=float, help='initial learning rate')
+    parser.add_argument('--lr', default=0.02, type=float,
+                        help='initial learning rate, 0.02 is the default value for training '
+                        'on 8 gpus and 2 images_per_gpu')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,