Unverified Commit f5d8fa84 authored by Chao Ma's avatar Chao Ma Committed by GitHub
Browse files

[Distributed] Fix all arguments to the format of xx_xxx (#2005)

* update

* update
parent 5e34ca8b
......@@ -50,7 +50,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers 1 \
--part_config ogb-product/ogb-product.json \
--ip_config ip_config.txt \
"python3 train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-servers 1 --num-epochs 30 --batch-size 1000 --num-workers 4"
"python3 train_dist.py --graph_name ogb-product --ip_config ip_config.txt --num_servers 1 --num_epochs 30 --batch_size 1000 --num_workers 4"
```
To run unsupervised training:
......@@ -62,7 +62,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers 1 \
--part_config ogb-product/ogb-product.json \
--ip_config ip_config.txt \
"python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph-name ogb-product --ip_config ip_config.txt --num-servers 1 --num-epochs 3 --batch-size 1000"
"python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_servers 1 --num_epochs 3 --batch_size 1000"
```
## Distributed code runs in the standalone mode
......@@ -81,13 +81,13 @@ python3 partition_graph.py --dataset ogb-product --num_parts 1
To run supervised training:
```bash
python3 train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 3 --batch-size 1000 --part_config data/ogb-product.json --standalone
python3 train_dist.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --part_config data/ogb-product.json --standalone
```
To run unsupervised training:
```bash
python3 train_dist_unsupervised.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 3 --batch-size 1000 --part_config data/ogb-product.json --standalone
python3 train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --part_config data/ogb-product.json --standalone
```
Note: please ensure that all environment variables shown above are unset if they were set for testing distributed training.
......@@ -289,26 +289,26 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument('--graph-name', type=str, help='graph name')
parser.add_argument('--graph_name', type=str, help='graph name')
parser.add_argument('--id', type=int, help='the partition id')
parser.add_argument('--ip_config', type=str, help='The file for IP configuration')
parser.add_argument('--part_config', type=str, help='The path to the partition config file')
parser.add_argument('--num-client', type=int, help='The number of clients')
parser.add_argument('--num-servers', type=int, default=1, help='The number of servers')
parser.add_argument('--n-classes', type=int, help='the number of classes')
parser.add_argument('--num_clients', type=int, help='The number of clients')
parser.add_argument('--num_servers', type=int, default=1, help='The number of servers')
parser.add_argument('--n_classes', type=int, help='the number of classes')
parser.add_argument('--gpu', type=int, default=0,
help="GPU device ID. Use -1 for CPU training")
parser.add_argument('--num-epochs', type=int, default=20)
parser.add_argument('--num-hidden', type=int, default=16)
parser.add_argument('--num-layers', type=int, default=2)
parser.add_argument('--fan-out', type=str, default='10,25')
parser.add_argument('--batch-size', type=int, default=1000)
parser.add_argument('--batch-size-eval', type=int, default=100000)
parser.add_argument('--log-every', type=int, default=20)
parser.add_argument('--eval-every', type=int, default=5)
parser.add_argument('--num_epochs', type=int, default=20)
parser.add_argument('--num_hidden', type=int, default=16)
parser.add_argument('--num_layers', type=int, default=2)
parser.add_argument('--fan_out', type=str, default='10,25')
parser.add_argument('--batch_size', type=int, default=1000)
parser.add_argument('--batch_size_eval', type=int, default=100000)
parser.add_argument('--log_every', type=int, default=20)
parser.add_argument('--eval_every', type=int, default=5)
parser.add_argument('--lr', type=float, default=0.003)
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--num-workers', type=int, default=4,
parser.add_argument('--num_workers', type=int, default=4,
help="Number of sampling processes. Use 0 for no extra process.")
parser.add_argument('--local_rank', type=int, help='get rank of the process')
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode')
......
......@@ -448,32 +448,32 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument('--graph-name', type=str, help='graph name')
parser.add_argument('--graph_name', type=str, help='graph name')
parser.add_argument('--id', type=int, help='the partition id')
parser.add_argument('--ip_config', type=str, help='The file for IP configuration')
parser.add_argument('--part_config', type=str, help='The path to the partition config file')
parser.add_argument('--num-servers', type=int, default=1, help='Server count on each machine.')
parser.add_argument('--n-classes', type=int, help='the number of classes')
parser.add_argument('--num_servers', type=int, default=1, help='Server count on each machine.')
parser.add_argument('--n_classes', type=int, help='the number of classes')
parser.add_argument('--gpu', type=int, default=0,
help="GPU device ID. Use -1 for CPU training")
parser.add_argument('--num-epochs', type=int, default=20)
parser.add_argument('--num-hidden', type=int, default=16)
parser.add_argument('--num_epochs', type=int, default=20)
parser.add_argument('--num_hidden', type=int, default=16)
parser.add_argument('--num-layers', type=int, default=2)
parser.add_argument('--fan-out', type=str, default='10,25')
parser.add_argument('--batch-size', type=int, default=1000)
parser.add_argument('--batch-size-eval', type=int, default=100000)
parser.add_argument('--log-every', type=int, default=20)
parser.add_argument('--eval-every', type=int, default=5)
parser.add_argument('--fan_out', type=str, default='10,25')
parser.add_argument('--batch_size', type=int, default=1000)
parser.add_argument('--batch_size_eval', type=int, default=100000)
parser.add_argument('--log_every', type=int, default=20)
parser.add_argument('--eval_every', type=int, default=5)
parser.add_argument('--lr', type=float, default=0.003)
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--num-workers', type=int, default=0,
parser.add_argument('--num_workers', type=int, default=0,
help="Number of sampling processes. Use 0 for no extra process.")
parser.add_argument('--local_rank', type=int, help='get rank of the process')
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode')
parser.add_argument('--num-negs', type=int, default=1)
parser.add_argument('--neg-share', default=False, action='store_true',
parser.add_argument('--num_negs', type=int, default=1)
parser.add_argument('--neg_share', default=False, action='store_true',
help="sharing neg nodes for positive nodes")
parser.add_argument('--remove-edge', default=False, action='store_true',
parser.add_argument('--remove_edge', default=False, action='store_true',
help="whether to remove edges during sampling")
args = parser.parse_args()
......
......@@ -113,12 +113,12 @@ def main():
help='The number of trainer processes per machine')
parser.add_argument('--num_samplers', type=int, default=0,
help='The number of sampler processes per trainer process')
parser.add_argument('--num_servers', type=int,
help='The number of server processes per machine')
parser.add_argument('--part_config', type=str,
help='The file (in workspace) of the partition config')
parser.add_argument('--ip_config', type=str,
help='The file (in workspace) of IP configuration for server processes')
parser.add_argument('--num_servers', type=int,
help='Server count on each machine.')
parser.add_argument('--num_server_threads', type=int, default=1,
help='The number of OMP threads in the server process. \
It should be small if server processes and trainer processes run on \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment