fix a little bug about resume (#1628)

* fix a little bug about resume When resuming, we need to start from the last epoch not 0. * the second way for resuming the second way for resuming

fix a little bug about resume (#1628)
* fix a little bug about resume When resuming, we need to start from the last epoch not 0. * the second way for resuming the second way for resuming
f4a82243 · MultiK · Francisco Massa · 10f34160 · f4a82243
Commit f4a82243 authored Dec 20, 2019 by MultiK Committed by Francisco Massa Dec 19, 2019
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

references/detection/train.py references/detection/train.py +7 -4

No files found.
--- a/references/detection/train.py
+++ b/references/detection/train.py
@@ -114,6 +114,7 @@ def main(args):
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
+        args.start_epoch = checkpoint['epoch'] + 1
    if args.test_only:
        evaluate(model, data_loader_test, device=device)
@@ -121,7 +122,7 @@ def main(args):
    print("Start training")
    start_time = time.time()
-    for epoch in range(args.epochs):
+    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
@@ -131,7 +132,8 @@ def main(args):
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
-                'args': args},
+                'args': args,
+                'epoch': epoch},
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
        # evaluate after every epoch
@@ -171,6 +173,7 @@ if __name__ == "__main__":
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    parser.add_argument('--output-dir', default='.', help='path where to save')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
+    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
    parser.add_argument(
        "--test-only",