Allow manually specifying checkpoint filename (#95)

8f1cb0c7 · Gao, Xiang · GitHub · 615f8144 · 8f1cb0c7 · 8f1cb0c7
Unverified Commit 8f1cb0c7 authored Sep 11, 2018 by Gao, Xiang Committed by GitHub Sep 11, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 5 deletions

torchani/neurochem/__init__.py torchani/neurochem/__init__.py +8 -4

torchani/neurochem/trainer.py torchani/neurochem/trainer.py +4 -1

No files found.
--- a/torchani/neurochem/__init__.py
+++ b/torchani/neurochem/__init__.py
@@ -314,16 +314,20 @@ class Trainer:
        filename (str): Input file name
        device (:class:`torch.device`): device to train the model
        tqdm (bool): whether to enable tqdm
-        tensorboard (str): Directory to store tensorboard log file, set to\
+        tensorboard (str): Directory to store tensorboard log file, set to
            ``None`` to disable tensorboardX.
        aev_caching (bool): Whether to use AEV caching.
+        checkpoint_name (str): Name of the checkpoint file, checkpoints will be
+            stored in the network directory with this file name.
    """
-    def __init__(self, filename, device=torch.device('cuda'),
+    def __init__(self, filename, device=torch.device('cuda'), tqdm=False,
-                 tqdm=False, tensorboard=None, aev_caching=False):
+                 tensorboard=None, aev_caching=False,
+                 checkpoint_name='model.pt'):
        self.filename = filename
        self.device = device
        self.aev_caching = aev_caching
+        self.checkpoint_name = checkpoint_name
        if tqdm:
            import tqdm
            self.tqdm = tqdm.tqdm
@@ -475,7 +479,7 @@ class Trainer:
        network_dir = os.path.join(dir, params['ntwkStoreDir'])
        if not os.path.exists(network_dir):
            os.makedirs(network_dir)
-        self.model_checkpoint = os.path.join(network_dir, 'model.pt')
+        self.model_checkpoint = os.path.join(network_dir, self.checkpoint_name)
        del params['ntwkStoreDir']
        self.max_nonimprove = params['tolr']
        del params['tolr']

--- a/torchani/neurochem/trainer.py
+++ b/torchani/neurochem/trainer.py
@@ -28,10 +28,13 @@ if __name__ == '__main__':
                        default=None)
    parser.add_argument('--cache-aev', dest='cache_aev', action='store_true',
                        help='Whether to cache AEV', default=None)
+    parser.add_argument('--checkpoint_name',
+                        help='Name of checkpoint file',
+                        default='model.pt')
    parser = parser.parse_args()
    d = torch.device(parser.device)
    trainer = Trainer(parser.config_path, d, parser.tqdm, parser.tensorboard,
-                      parser.cache_aev)
+                      parser.cache_aev, parser.checkpoint_name)
    trainer.load_data(parser.training_path, parser.validation_path)
    trainer.run()