Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
f5d8fa84
Unverified
Commit
f5d8fa84
authored
Aug 12, 2020
by
Chao Ma
Committed by
GitHub
Aug 12, 2020
Browse files
[Distributed] Fix all arguments to the format of xx_xxx (#2005)
* update * update
parent
5e34ca8b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
33 additions
and
33 deletions
+33
-33
examples/pytorch/graphsage/experimental/README.md
examples/pytorch/graphsage/experimental/README.md
+4
-4
examples/pytorch/graphsage/experimental/train_dist.py
examples/pytorch/graphsage/experimental/train_dist.py
+13
-13
examples/pytorch/graphsage/experimental/train_dist_unsupervised.py
...pytorch/graphsage/experimental/train_dist_unsupervised.py
+14
-14
tools/launch.py
tools/launch.py
+2
-2
No files found.
examples/pytorch/graphsage/experimental/README.md
View file @
f5d8fa84
...
...
@@ -50,7 +50,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers
1
\
--part_config
ogb-product/ogb-product.json
\
--ip_config
ip_config.txt
\
"python3 train_dist.py --graph
-
name ogb-product --ip_config ip_config.txt --num
-
servers 1 --num
-
epochs 30 --batch
-
size 1000 --num
-
workers 4"
"python3 train_dist.py --graph
_
name ogb-product --ip_config ip_config.txt --num
_
servers 1 --num
_
epochs 30 --batch
_
size 1000 --num
_
workers 4"
```
To run unsupervised training:
...
...
@@ -62,7 +62,7 @@ python3 ~/dgl/tools/launch.py \
--num_servers
1
\
--part_config
ogb-product/ogb-product.json
\
--ip_config
ip_config.txt
\
"python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph
-
name ogb-product --ip_config ip_config.txt --num
-
servers 1 --num
-
epochs 3 --batch
-
size 1000"
"python3 ~/dgl/examples/pytorch/graphsage/experimental/train_dist_unsupervised.py --graph
_
name ogb-product --ip_config ip_config.txt --num
_
servers 1 --num
_
epochs 3 --batch
_
size 1000"
```
## Distributed code runs in the standalone mode
...
...
@@ -81,13 +81,13 @@ python3 partition_graph.py --dataset ogb-product --num_parts 1
To run supervised training:
```
bash
python3 train_dist.py
--graph
-
name
ogb-product
--ip_config
ip_config.txt
--num
-
epochs
3
--batch
-
size
1000
--part_config
data/ogb-product.json
--standalone
python3 train_dist.py
--graph
_
name
ogb-product
--ip_config
ip_config.txt
--num
_
epochs
3
--batch
_
size
1000
--part_config
data/ogb-product.json
--standalone
```
To run unsupervised training:
```
bash
python3 train_dist_unsupervised.py
--graph
-
name
ogb-product
--ip_config
ip_config.txt
--num
-
epochs
3
--batch
-
size
1000
--part_config
data/ogb-product.json
--standalone
python3 train_dist_unsupervised.py
--graph
_
name
ogb-product
--ip_config
ip_config.txt
--num
_
epochs
3
--batch
_
size
1000
--part_config
data/ogb-product.json
--standalone
```
Note: please ensure that all environment variables shown above are unset if they were set for testing distributed training.
examples/pytorch/graphsage/experimental/train_dist.py
View file @
f5d8fa84
...
...
@@ -289,26 +289,26 @@ def main(args):
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'GCN'
)
register_data_args
(
parser
)
parser
.
add_argument
(
'--graph
-
name'
,
type
=
str
,
help
=
'graph name'
)
parser
.
add_argument
(
'--graph
_
name'
,
type
=
str
,
help
=
'graph name'
)
parser
.
add_argument
(
'--id'
,
type
=
int
,
help
=
'the partition id'
)
parser
.
add_argument
(
'--ip_config'
,
type
=
str
,
help
=
'The file for IP configuration'
)
parser
.
add_argument
(
'--part_config'
,
type
=
str
,
help
=
'The path to the partition config file'
)
parser
.
add_argument
(
'--num
-
client'
,
type
=
int
,
help
=
'The number of clients'
)
parser
.
add_argument
(
'--num
-
servers'
,
type
=
int
,
default
=
1
,
help
=
'The number of servers'
)
parser
.
add_argument
(
'--n
-
classes'
,
type
=
int
,
help
=
'the number of classes'
)
parser
.
add_argument
(
'--num
_
client
s
'
,
type
=
int
,
help
=
'The number of clients'
)
parser
.
add_argument
(
'--num
_
servers'
,
type
=
int
,
default
=
1
,
help
=
'The number of servers'
)
parser
.
add_argument
(
'--n
_
classes'
,
type
=
int
,
help
=
'the number of classes'
)
parser
.
add_argument
(
'--gpu'
,
type
=
int
,
default
=
0
,
help
=
"GPU device ID. Use -1 for CPU training"
)
parser
.
add_argument
(
'--num
-
epochs'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--num
-
hidden'
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
'--num
-
layers'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--fan
-
out'
,
type
=
str
,
default
=
'10,25'
)
parser
.
add_argument
(
'--batch
-
size'
,
type
=
int
,
default
=
1000
)
parser
.
add_argument
(
'--batch
-
size
-
eval'
,
type
=
int
,
default
=
100000
)
parser
.
add_argument
(
'--log
-
every'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--eval
-
every'
,
type
=
int
,
default
=
5
)
parser
.
add_argument
(
'--num
_
epochs'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--num
_
hidden'
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
'--num
_
layers'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--fan
_
out'
,
type
=
str
,
default
=
'10,25'
)
parser
.
add_argument
(
'--batch
_
size'
,
type
=
int
,
default
=
1000
)
parser
.
add_argument
(
'--batch
_
size
_
eval'
,
type
=
int
,
default
=
100000
)
parser
.
add_argument
(
'--log
_
every'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--eval
_
every'
,
type
=
int
,
default
=
5
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
0.003
)
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
default
=
0.5
)
parser
.
add_argument
(
'--num
-
workers'
,
type
=
int
,
default
=
4
,
parser
.
add_argument
(
'--num
_
workers'
,
type
=
int
,
default
=
4
,
help
=
"Number of sampling processes. Use 0 for no extra process."
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
help
=
'get rank of the process'
)
parser
.
add_argument
(
'--standalone'
,
action
=
'store_true'
,
help
=
'run in the standalone mode'
)
...
...
examples/pytorch/graphsage/experimental/train_dist_unsupervised.py
View file @
f5d8fa84
...
...
@@ -448,32 +448,32 @@ def main(args):
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'GCN'
)
register_data_args
(
parser
)
parser
.
add_argument
(
'--graph
-
name'
,
type
=
str
,
help
=
'graph name'
)
parser
.
add_argument
(
'--graph
_
name'
,
type
=
str
,
help
=
'graph name'
)
parser
.
add_argument
(
'--id'
,
type
=
int
,
help
=
'the partition id'
)
parser
.
add_argument
(
'--ip_config'
,
type
=
str
,
help
=
'The file for IP configuration'
)
parser
.
add_argument
(
'--part_config'
,
type
=
str
,
help
=
'The path to the partition config file'
)
parser
.
add_argument
(
'--num
-
servers'
,
type
=
int
,
default
=
1
,
help
=
'Server count on each machine.'
)
parser
.
add_argument
(
'--n
-
classes'
,
type
=
int
,
help
=
'the number of classes'
)
parser
.
add_argument
(
'--num
_
servers'
,
type
=
int
,
default
=
1
,
help
=
'Server count on each machine.'
)
parser
.
add_argument
(
'--n
_
classes'
,
type
=
int
,
help
=
'the number of classes'
)
parser
.
add_argument
(
'--gpu'
,
type
=
int
,
default
=
0
,
help
=
"GPU device ID. Use -1 for CPU training"
)
parser
.
add_argument
(
'--num
-
epochs'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--num
-
hidden'
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
'--num
_
epochs'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--num
_
hidden'
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
'--num-layers'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--fan
-
out'
,
type
=
str
,
default
=
'10,25'
)
parser
.
add_argument
(
'--batch
-
size'
,
type
=
int
,
default
=
1000
)
parser
.
add_argument
(
'--batch
-
size
-
eval'
,
type
=
int
,
default
=
100000
)
parser
.
add_argument
(
'--log
-
every'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--eval
-
every'
,
type
=
int
,
default
=
5
)
parser
.
add_argument
(
'--fan
_
out'
,
type
=
str
,
default
=
'10,25'
)
parser
.
add_argument
(
'--batch
_
size'
,
type
=
int
,
default
=
1000
)
parser
.
add_argument
(
'--batch
_
size
_
eval'
,
type
=
int
,
default
=
100000
)
parser
.
add_argument
(
'--log
_
every'
,
type
=
int
,
default
=
20
)
parser
.
add_argument
(
'--eval
_
every'
,
type
=
int
,
default
=
5
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
0.003
)
parser
.
add_argument
(
'--dropout'
,
type
=
float
,
default
=
0.5
)
parser
.
add_argument
(
'--num
-
workers'
,
type
=
int
,
default
=
0
,
parser
.
add_argument
(
'--num
_
workers'
,
type
=
int
,
default
=
0
,
help
=
"Number of sampling processes. Use 0 for no extra process."
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
help
=
'get rank of the process'
)
parser
.
add_argument
(
'--standalone'
,
action
=
'store_true'
,
help
=
'run in the standalone mode'
)
parser
.
add_argument
(
'--num
-
negs'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--neg
-
share'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--num
_
negs'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--neg
_
share'
,
default
=
False
,
action
=
'store_true'
,
help
=
"sharing neg nodes for positive nodes"
)
parser
.
add_argument
(
'--remove
-
edge'
,
default
=
False
,
action
=
'store_true'
,
parser
.
add_argument
(
'--remove
_
edge'
,
default
=
False
,
action
=
'store_true'
,
help
=
"whether to remove edges during sampling"
)
args
=
parser
.
parse_args
()
...
...
tools/launch.py
View file @
f5d8fa84
...
...
@@ -113,12 +113,12 @@ def main():
help
=
'The number of trainer processes per machine'
)
parser
.
add_argument
(
'--num_samplers'
,
type
=
int
,
default
=
0
,
help
=
'The number of sampler processes per trainer process'
)
parser
.
add_argument
(
'--num_servers'
,
type
=
int
,
help
=
'The number of server processes per machine'
)
parser
.
add_argument
(
'--part_config'
,
type
=
str
,
help
=
'The file (in workspace) of the partition config'
)
parser
.
add_argument
(
'--ip_config'
,
type
=
str
,
help
=
'The file (in workspace) of IP configuration for server processes'
)
parser
.
add_argument
(
'--num_servers'
,
type
=
int
,
help
=
'Server count on each machine.'
)
parser
.
add_argument
(
'--num_server_threads'
,
type
=
int
,
default
=
1
,
help
=
'The number of OMP threads in the server process.
\
It should be small if server processes and trainer processes run on
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment