Update (#2062)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-1-5.us-west-2.compute.internal>

Update (#2062)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-1-5.us-west-2.compute.internal>
100d9328 · Mufei Li · GitHub · a260a6e6 · 100d9328 · a260a6e6
Unverified Commit 100d9328 authored Aug 19, 2020 by Mufei Li Committed by GitHub Aug 19, 2020
4 changed files
--- a/examples/pytorch/dgmg/README.md
+++ b/examples/pytorch/dgmg/README.md
@@ -4,7 +4,7 @@ This is an implementation of [Learning Deep Generative Models of Graphs](https:/
 Yujia Li, Oriol Vinyals, Chris Dyer, Razvan Pascanu, Peter Battaglia.
 For molecule generation, see 
-[our model zoo for Chemistry](https://github.com/dmlc/dgl/tree/master/examples/pytorch/model_zoo/chem/generative_models/dgmg).
+[DGL-LifeSci](https://github.com/awslabs/dgl-lifesci/tree/master/examples/generative_models/dgmg).
 ## Dependencies
 - Python 3.5.2
@@ -13,8 +13,7 @@ For molecule generation, see
 ## Usage
- Train with batch size 1: `python3 main.py`
+`python3 main.py`
- Train with batch size larger than 1: `python3 main_batch.py`.
 ## Performance
@@ -22,8 +21,7 @@ For molecule generation, see
 ## Speed
-On AWS p3.2x instance (w/ V100), one epoch takes ~526s for batch size 1 and takes
+On AWS p3.2x instance (w/ V100), one epoch takes ~526s.
-~238s for batch size 10.
 ## Acknowledgement

--- a/examples/pytorch/dgmg/main_batch.py
+++ b/examples/pytorch/dgmg/main_batch.py
-"""
-Learning Deep Generative Models of Graphs
-Paper: https://arxiv.org/pdf/1803.03324.pdf
-This implementation works with a minibatch of size larger than 1 for training and 1 for inference.
-"""
-import argparse
-import datetime
-import time
-import torch
-from torch.optim import Adam
-from torch.utils.data import DataLoader
-from torch.nn.utils import clip_grad_norm_
-from model_batch import DGMG
-def main(opts):
-    t1 = time.time()
-    # Setup dataset and data loader
-    if opts['dataset'] == 'cycles':
-        from cycles import CycleDataset, CycleModelEvaluation, CyclePrinting
-        dataset = CycleDataset(fname=opts['path_to_dataset'])
-        evaluator = CycleModelEvaluation(v_min=opts['min_size'],
-                                         v_max=opts['max_size'],
-                                         dir = opts['log_dir'])
-        printer = CyclePrinting(num_epochs=opts['nepochs'],
-                                num_batches=len(dataset) // opts['batch_size'])
-    else:
-        raise ValueError('Unsupported dataset: {}'.format(opts['dataset']))
-    data_loader = DataLoader(dataset, batch_size=opts['batch_size'], shuffle=True, num_workers=0,
-                             collate_fn=dataset.collate_batch)
-    # Initialize_model
-    model = DGMG(v_max=opts['max_size'],
-                 node_hidden_size=opts['node_hidden_size'],
-                 num_prop_rounds=opts['num_propagation_rounds'])
-    # Initialize optimizer
-    if opts['optimizer'] == 'Adam':
-        optimizer = Adam(model.parameters(), lr=opts['lr'])
-    else:
-        raise ValueError('Unsupported argument for the optimizer')
-    t2 = time.time()
-    # Training
-    model.train()
-    for epoch in range(opts['nepochs']):
-        for batch, data in enumerate(data_loader):
-            log_prob = model(batch_size=opts['batch_size'], actions=data)
-            loss = - log_prob / opts['batch_size']
-            batch_avg_prob = (log_prob / opts['batch_size']).detach().exp()
-            batch_avg_loss = loss.item()
-            optimizer.zero_grad()
-            loss.backward()
-            if opts['clip_grad']:
-                clip_grad_norm_(model.parameters(), opts['clip_bound'])
-            optimizer.step()
-            printer.update(epoch + 1,  {'averaged loss': batch_avg_loss,
-                                        'averaged prob': batch_avg_prob})
-    t3 = time.time()
-    model.eval()
-    evaluator.rollout_and_examine(model, opts['num_generated_samples'])
-    evaluator.write_summary()
-    t4 = time.time()
-    print('It took {} to setup.'.format(datetime.timedelta(seconds=t2-t1)))
-    print('It took {} to finish training.'.format(datetime.timedelta(seconds=t3-t2)))
-    print('It took {} to finish evaluation.'.format(datetime.timedelta(seconds=t4-t3)))
-    print('--------------------------------------------------------------------------')
-    print('On average, an epoch takes {}.'.format(datetime.timedelta(
-        seconds=(t3-t2) / opts['nepochs'])))
-    del model.g_list
-    torch.save(model, './model_batched.pth')
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='batched DGMG')
-    # configure
-    parser.add_argument('--seed', type=int, default=9284, help='random seed')
-    # dataset
-    parser.add_argument('--dataset', choices=['cycles'], default='cycles',
-                        help='dataset to use')
-    parser.add_argument('--path-to-dataset', type=str, default='cycles.p',
-                        help='load the dataset if it exists, '
-                             'generate it and save to the path otherwise')
-    # log
-    parser.add_argument('--log-dir', default='./results',
-                        help='folder to save info like experiment configuration '
-                             'or model evaluation results')
-    # optimization
-    parser.add_argument('--batch-size', type=int, default=10,
-                        help='batch size to use for training')
-    parser.add_argument('--clip-grad', action='store_true', default=True,
-                        help='gradient clipping is required to prevent gradient explosion')
-    parser.add_argument('--clip-bound', type=float, default=0.25,
-                        help='constraint of gradient norm for gradient clipping')
-    args = parser.parse_args()
-    from utils import setup
-    opts = setup(args)
-    main(opts)
\ No newline at end of file
--- a/examples/pytorch/dgmg/model_batch.py
+++ b/examples/pytorch/dgmg/model_batch.py
--- a/tutorials/models/3_generative_model/5_dgmg.py
+++ b/tutorials/models/3_generative_model/5_dgmg.py
@@ -765,77 +765,3 @@ print('Among 100 graphs generated, {}% are valid.'.format(num_valid))
 # For the complete implementation, see the `DGL DGMG example
 # <https://github.com/dmlc/dgl/tree/master/examples/pytorch/dgmg>`__.
 #
-# Batched graph generation
-# ---------------------------
-#
-# Speeding up DGMG is hard because each graph can be generated with a
-# unique sequence of actions. One way to explore parallelism is to adopt
-# asynchronous gradient descent with multiple processes. Each of them
-# works on one graph at a time and the processes are loosely coordinated
-# by a parameter server.
-#
-# DGL explores parallelism in the message-passing framework, on top of
-# the framework-provided tensor operation. The earlier tutorial already
-# does that in the message propagation and graph embedding phases, but
-# only within one graph. For a batch of graphs, a for loop is then needed:
-#
-# ::
-#
-#     for g in g_list:
-#     self.graph_prop(g)
-#
-# Modify the code to work on a batch of graphs at once by replacing
-# these lines with the following. On CPU with a macOS, you instantly
-# enjoy a six to seven-time reduction for the graph propagation part.
-# ::
-#
-#     bg = dgl.batch(g_list)
-#     self.graph_prop(bg)
-#     g_list = dgl.unbatch(bg)
-#
-# You have already used this trick of calling ``dgl.batch`` in the
-# `Tree-LSTM tutorial
-# <http://docs.dgl.ai/tutorials/models/3_tree-lstm.html#sphx-glr-tutorials-models-3-tree-lstm-py>`__
-# , and it is worth explaining one more time why this is so.
-#
-# By batching many small graphs, DGL parallels message passing on each individual
-# graphs of a batch.
-#
-# With ``dgl.batch``, you merge ``g_{1}, ..., g_{N}`` into one single giant
-# graph consisting of :math:`N` isolated small graphs. For example, if we
-# have two graphs with adjacency matrices
-#
-# ::
-#
-#     [0, 1]
-#     [1, 0]
-#
-#     [0, 1, 0]
-#     [1, 0, 0]
-#     [0, 1, 0]
-#
-# ``dgl.batch`` simply gives a graph whose adjacency matrix is
-#
-# ::
-#
-#     [0, 1, 0, 0, 0]
-#     [1, 0, 0, 0, 0]
-#     [0, 1, 0, 0, 0]
-#     [1, 0, 0, 0, 0]
-#     [0, 1, 0, 0, 0]
-#
-# In DGL, the message function is defined on the edges, thus batching scales
-# the processing of edge user-defined functions (UDFs) linearly.
-#
-# The reduce UDFs or ``dgmg_reduce``, work on nodes. Each of them may
-# have different numbers of incoming edges. Using ``degree bucketing``, DGL
-# internally groups nodes with the same in-degrees and calls reduce UDF once
-# for each group. Thus, batching also reduces number of calls to these UDFs.
-#
-# The modification of the node/edge features of the batched graph object
-# does not take effect on the features of the original small graphs, so we
-# need to replace the old graph list with the new graph list
-# ``g_list = dgl.unbatch(bg)``.
-#
-# The complete code to the batched version can also be found in the example.
-# On a testbed, you get roughly double the speed when compared to the previous implementation.