"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "23d50522e7427ff74bc103af956fc114e4fc1969"
Unverified Commit f5d8fa84 authored by Chao Ma's avatar Chao Ma Committed by GitHub
Browse files

[Distributed] Fix all arguments to the format of xx_xxx (#2005)

* update

* update
parent 5e34ca8b
...@@ -50,7 +50,7 @@ python3 ~/dgl/tools/launch.py \ ...@@ -50,7 +50,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers 1 \ --num_servers 1 \
--part_config ogb-product/ogb-product.json \ --part_config ogb-product/ogb-product.json \
--ip_config ip_config.txt \ --ip_config ip_config.txt \
"python3 train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-servers 1 --num-epochs 30 --batch-size 1000 --num-workers 4" "python3 train_dist.py --graph_name ogb-product --ip_config ip_config.txt --num_servers 1 --num_epochs 30 --batch_size 1000 --num_workers 4"
``` ```
To run unsupervised training: To run unsupervised training:
...@@ -62,7 +62,7 @@ python3 ~/dgl/tools/launch.py \ ...@@ -62,7 +62,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers 1 \ --num_servers 1 \
--part_config ogb-product/ogb-product.json \ --part_config ogb-product/ogb-product.json \
--ip_config ip_config.txt \ --ip_config ip_config.txt \
"python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph-name ogb-product --ip_config ip_config.txt --num-servers 1 --num-epochs 3 --batch-size 1000" "python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_servers 1 --num_epochs 3 --batch_size 1000"
``` ```
## Distributed code runs in the standalone mode ## Distributed code runs in the standalone mode
...@@ -81,13 +81,13 @@ python3 partition_graph.py --dataset ogb-product --num_parts 1 ...@@ -81,13 +81,13 @@ python3 partition_graph.py --dataset ogb-product --num_parts 1
To run supervised training: To run supervised training:
```bash ```bash
python3 train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 3 --batch-size 1000 --part_config data/ogb-product.json --standalone python3 train_dist.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --part_config data/ogb-product.json --standalone
``` ```
To run unsupervised training: To run unsupervised training:
```bash ```bash
python3 train_dist_unsupervised.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 3 --batch-size 1000 --part_config data/ogb-product.json --standalone python3 train_dist_unsupervised.py --graph_name ogb-product --ip_config ip_config.txt --num_epochs 3 --batch_size 1000 --part_config data/ogb-product.json --standalone
``` ```
Note: please ensure that all environment variables shown above are unset if they were set for testing distributed training. Note: please ensure that all environment variables shown above are unset if they were set for testing distributed training.
...@@ -289,26 +289,26 @@ def main(args): ...@@ -289,26 +289,26 @@ def main(args):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN') parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser) register_data_args(parser)
parser.add_argument('--graph-name', type=str, help='graph name') parser.add_argument('--graph_name', type=str, help='graph name')
parser.add_argument('--id', type=int, help='the partition id') parser.add_argument('--id', type=int, help='the partition id')
parser.add_argument('--ip_config', type=str, help='The file for IP configuration') parser.add_argument('--ip_config', type=str, help='The file for IP configuration')
parser.add_argument('--part_config', type=str, help='The path to the partition config file') parser.add_argument('--part_config', type=str, help='The path to the partition config file')
parser.add_argument('--num-client', type=int, help='The number of clients') parser.add_argument('--num_clients', type=int, help='The number of clients')
parser.add_argument('--num-servers', type=int, default=1, help='The number of servers') parser.add_argument('--num_servers', type=int, default=1, help='The number of servers')
parser.add_argument('--n-classes', type=int, help='the number of classes') parser.add_argument('--n_classes', type=int, help='the number of classes')
parser.add_argument('--gpu', type=int, default=0, parser.add_argument('--gpu', type=int, default=0,
help="GPU device ID. Use -1 for CPU training") help="GPU device ID. Use -1 for CPU training")
parser.add_argument('--num-epochs', type=int, default=20) parser.add_argument('--num_epochs', type=int, default=20)
parser.add_argument('--num-hidden', type=int, default=16) parser.add_argument('--num_hidden', type=int, default=16)
parser.add_argument('--num-layers', type=int, default=2) parser.add_argument('--num_layers', type=int, default=2)
parser.add_argument('--fan-out', type=str, default='10,25') parser.add_argument('--fan_out', type=str, default='10,25')
parser.add_argument('--batch-size', type=int, default=1000) parser.add_argument('--batch_size', type=int, default=1000)
parser.add_argument('--batch-size-eval', type=int, default=100000) parser.add_argument('--batch_size_eval', type=int, default=100000)
parser.add_argument('--log-every', type=int, default=20) parser.add_argument('--log_every', type=int, default=20)
parser.add_argument('--eval-every', type=int, default=5) parser.add_argument('--eval_every', type=int, default=5)
parser.add_argument('--lr', type=float, default=0.003) parser.add_argument('--lr', type=float, default=0.003)
parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--num-workers', type=int, default=4, parser.add_argument('--num_workers', type=int, default=4,
help="Number of sampling processes. Use 0 for no extra process.") help="Number of sampling processes. Use 0 for no extra process.")
parser.add_argument('--local_rank', type=int, help='get rank of the process') parser.add_argument('--local_rank', type=int, help='get rank of the process')
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode') parser.add_argument('--standalone', action='store_true', help='run in the standalone mode')
......
...@@ -448,32 +448,32 @@ def main(args): ...@@ -448,32 +448,32 @@ def main(args):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN') parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser) register_data_args(parser)
parser.add_argument('--graph-name', type=str, help='graph name') parser.add_argument('--graph_name', type=str, help='graph name')
parser.add_argument('--id', type=int, help='the partition id') parser.add_argument('--id', type=int, help='the partition id')
parser.add_argument('--ip_config', type=str, help='The file for IP configuration') parser.add_argument('--ip_config', type=str, help='The file for IP configuration')
parser.add_argument('--part_config', type=str, help='The path to the partition config file') parser.add_argument('--part_config', type=str, help='The path to the partition config file')
parser.add_argument('--num-servers', type=int, default=1, help='Server count on each machine.') parser.add_argument('--num_servers', type=int, default=1, help='Server count on each machine.')
parser.add_argument('--n-classes', type=int, help='the number of classes') parser.add_argument('--n_classes', type=int, help='the number of classes')
parser.add_argument('--gpu', type=int, default=0, parser.add_argument('--gpu', type=int, default=0,
help="GPU device ID. Use -1 for CPU training") help="GPU device ID. Use -1 for CPU training")
parser.add_argument('--num-epochs', type=int, default=20) parser.add_argument('--num_epochs', type=int, default=20)
parser.add_argument('--num-hidden', type=int, default=16) parser.add_argument('--num_hidden', type=int, default=16)
parser.add_argument('--num-layers', type=int, default=2) parser.add_argument('--num-layers', type=int, default=2)
parser.add_argument('--fan-out', type=str, default='10,25') parser.add_argument('--fan_out', type=str, default='10,25')
parser.add_argument('--batch-size', type=int, default=1000) parser.add_argument('--batch_size', type=int, default=1000)
parser.add_argument('--batch-size-eval', type=int, default=100000) parser.add_argument('--batch_size_eval', type=int, default=100000)
parser.add_argument('--log-every', type=int, default=20) parser.add_argument('--log_every', type=int, default=20)
parser.add_argument('--eval-every', type=int, default=5) parser.add_argument('--eval_every', type=int, default=5)
parser.add_argument('--lr', type=float, default=0.003) parser.add_argument('--lr', type=float, default=0.003)
parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--num-workers', type=int, default=0, parser.add_argument('--num_workers', type=int, default=0,
help="Number of sampling processes. Use 0 for no extra process.") help="Number of sampling processes. Use 0 for no extra process.")
parser.add_argument('--local_rank', type=int, help='get rank of the process') parser.add_argument('--local_rank', type=int, help='get rank of the process')
parser.add_argument('--standalone', action='store_true', help='run in the standalone mode') parser.add_argument('--standalone', action='store_true', help='run in the standalone mode')
parser.add_argument('--num-negs', type=int, default=1) parser.add_argument('--num_negs', type=int, default=1)
parser.add_argument('--neg-share', default=False, action='store_true', parser.add_argument('--neg_share', default=False, action='store_true',
help="sharing neg nodes for positive nodes") help="sharing neg nodes for positive nodes")
parser.add_argument('--remove-edge', default=False, action='store_true', parser.add_argument('--remove_edge', default=False, action='store_true',
help="whether to remove edges during sampling") help="whether to remove edges during sampling")
args = parser.parse_args() args = parser.parse_args()
......
...@@ -113,12 +113,12 @@ def main(): ...@@ -113,12 +113,12 @@ def main():
help='The number of trainer processes per machine') help='The number of trainer processes per machine')
parser.add_argument('--num_samplers', type=int, default=0, parser.add_argument('--num_samplers', type=int, default=0,
help='The number of sampler processes per trainer process') help='The number of sampler processes per trainer process')
parser.add_argument('--num_servers', type=int,
help='The number of server processes per machine')
parser.add_argument('--part_config', type=str, parser.add_argument('--part_config', type=str,
help='The file (in workspace) of the partition config') help='The file (in workspace) of the partition config')
parser.add_argument('--ip_config', type=str, parser.add_argument('--ip_config', type=str,
help='The file (in workspace) of IP configuration for server processes') help='The file (in workspace) of IP configuration for server processes')
parser.add_argument('--num_servers', type=int,
help='Server count on each machine.')
parser.add_argument('--num_server_threads', type=int, default=1, parser.add_argument('--num_server_threads', type=int, default=1,
help='The number of OMP threads in the server process. \ help='The number of OMP threads in the server process. \
It should be small if server processes and trainer processes run on \ It should be small if server processes and trainer processes run on \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment