Unverified Commit d57ff78d authored by Chao Ma's avatar Chao Ma Committed by GitHub
Browse files

[Small Fix] Change default message queue size of communicator to 2GB (#1140)

* API change of kvstore

* add demo for kvstore

* update

* remove duplicated log

* change queue size

* update

* update

* update

* update

* update

* update

* update

* update

* update

* fix lint

* change name

* update

* fix lint

* update

* update

* update

* update

* change message queue size to a python argument

* change default queue size to 2GB

* OMP_NUM_THREADS=1
parent 17aab812
......@@ -39,7 +39,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_ns --dataset cora --self-loop --nu
Sampler side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset cora --self-loop --num-neighbors 2 --batch-size 1000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset cora --self-loop --num-neighbors 2 --batch-size 1000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### citeseer
......@@ -53,7 +53,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_ns --dataset citeseer --self-loop
Sampler side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset citeseer --self-loop --num-neighbors 2 --batch-size 1000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset citeseer --self-loop --num-neighbors 2 --batch-size 1000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### pubmed
......@@ -67,7 +67,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_ns --dataset pubmed --self-loop --
Sampler side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset pubmed --self-loop --num-neighbors 3 --batch-size 1000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset pubmed --self-loop --num-neighbors 3 --batch-size 1000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### reddit
......@@ -81,7 +81,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_ns --dataset reddit-self-loop --nu
Sampler side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset reddit-self-loop --num-neighbors 2 --batch-size 1000 --ip 127.0.0.1:2049 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_ns --dataset reddit-self-loop --num-neighbors 2 --batch-size 1000 --ip 127.0.0.1:2049 --num-sampler 1
```
### Control Variate & Skip Connection
......@@ -97,7 +97,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_cv --dataset cora --self-loop --nu
Sampler side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset cora --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset cora --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### citeseer
......@@ -111,7 +111,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_cv --dataset citeseer --self-loop
Sampler Side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset citeseer --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset citeseer --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### pubmed
......@@ -123,7 +123,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_cv --dataset pubmed --self-loop --
Sampler Side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset pubmed --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset pubmed --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### reddit
......@@ -137,7 +137,7 @@ DGLBACKEND=mxnet python3 train.py --model gcn_cv --dataset reddit-self-loop --nu
Sampler Side:
```
DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset reddit-self-loop --num-neighbors 1 --batch-size 10000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model gcn_cv --dataset reddit-self-loop --num-neighbors 1 --batch-size 10000 --ip 127.0.0.1:50051 --num-sampler 1
```
### Control Variate & GraphSAGE-mean
......@@ -155,5 +155,5 @@ DGLBACKEND=mxnet python3 train.py --model graphsage_cv --batch-size 1000 --test-
Sampler side:
```
DGLBACKEND=mxnet python3 sampler.py --model graphsage_cv --batch-size 1000 --dataset reddit --num-neighbors 1 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=mxnet python3 sampler.py --model graphsage_cv --batch-size 1000 --dataset reddit --num-neighbors 1 --ip 127.0.0.1:50051 --num-sampler 1
```
......@@ -39,7 +39,7 @@ DGLBACKEND=pytorch python3 gcn_ns_sc_train.py --dataset cora --self-loop --num-n
Sampler side:
```
DGLBACKEND=pytorch python3 sampler.py --model gcn_ns --dataset cora --self-loop --num-neighbors 2 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=pytorch python3 sampler.py --model gcn_ns --dataset cora --self-loop --num-neighbors 2 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### citeseer
......@@ -53,7 +53,7 @@ DGLBACKEND=pytorch python3 gcn_ns_sc_train.py --dataset citeseer --self-loop --n
Sampler side:
```
DGLBACKEND=pytorch python3 sampler.py --model gcn_ns --dataset citeseer --self-loop --num-neighbors 2 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=pytorch python3 sampler.py --model gcn_ns --dataset citeseer --self-loop --num-neighbors 2 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### pubmed
......@@ -67,7 +67,7 @@ DGLBACKEND=pytorch python3 gcn_ns_sc_train.py --dataset pubmed --self-loop --num
Sampler side:
```
DGLBACKEND=pytorch python3 sampler.py --model gcn_ns --dataset pubmed --self-loop --num-neighbors 3 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=pytorch python3 sampler.py --model gcn_ns --dataset pubmed --self-loop --num-neighbors 3 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
### Control Variate & Skip Connection
......@@ -83,7 +83,7 @@ DGLBACKEND=pytorch python3 gcn_cv_sc_train.py --dataset cora --self-loop --num-n
Sampler side:
```
DGLBACKEND=pytorch python3 sampler.py --model gcn_cv --dataset cora --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=pytorch python3 sampler.py --model gcn_cv --dataset cora --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### citeseer
......@@ -97,7 +97,7 @@ DGLBACKEND=pytorch python3 gcn_cv_sc_train.py --dataset citeseer --self-loop --n
Sampler side:
```
DGLBACKEND=pytorch python3 sampler.py --model gcn_cv --dataset citeseer --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=pytorch python3 sampler.py --model gcn_cv --dataset citeseer --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
#### pubmed
......@@ -111,5 +111,5 @@ DGLBACKEND=pytorch python3 gcn_cv_sc_train.py --dataset pubmed --self-loop --num
Sampler side:
```
DGLBACKEND=pytorch python3 sampler.py --model gcn_cv --dataset pubmed --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
OMP_NUM_THREADS=1 DGLBACKEND=pytorch python3 sampler.py --model gcn_cv --dataset pubmed --self-loop --num-neighbors 1 --batch-size 1000000 --ip 127.0.0.1:50051 --num-sampler 1
```
......@@ -76,7 +76,7 @@ def start_server(server_id, ip_config, num_client, ndata, edata, ndata_g2l=None,
edata_g2l : dict of tensor (mx.ndarray or torch.tensor)
global2local mapping of edge data
msg_queue_size : int
Size of message queue
Size of message queue (2GB by default)
"""
assert server_id >= 0, 'server_id (%d) cannot be a negative number.' % server_id
assert len(ip_config) > 0, 'ip_config cannot be empty.'
......@@ -123,7 +123,7 @@ def start_client(ip_config, ndata_partition_book, edata_partition_book, close_sh
close_shared_mem : bool
Close local shared-memory tensor access.
msg_queue_size : int
Size of message queue
Size of message queue (2GB by default)
Returns
-------
......@@ -171,7 +171,7 @@ class KVServer(object):
num_client : int
Total number of clients connecting to server.
msg_queue_size : int
Size of message queue
Size of message queue (2GB by default)
net_type : str
networking type, e.g., 'socket' (default) or 'mpi' (do not support yet).
"""
......@@ -478,7 +478,7 @@ class KVClient(object):
close_shared_mem : bool
DO NOT use shared-memory access on local machine.
msg_queue_size : int
Size of message queue.
Size of message queue (2GB by default).
net_type : str
networking type, e.g., 'socket' (default) or 'mpi'.
"""
......
......@@ -31,7 +31,7 @@ def _network_wait():
"""
time.sleep(_WAIT_TIME_SEC)
def _create_sender(net_type, msg_queue_size=2000*1024*1024*1024):
def _create_sender(net_type, msg_queue_size=2*1024*1024*1024):
"""Create a Sender communicator via C api
Parameters
......@@ -39,12 +39,12 @@ def _create_sender(net_type, msg_queue_size=2000*1024*1024*1024):
net_type : str
'socket' or 'mpi'
msg_queue_size : int
message queue size
message queue size (2GB by default)
"""
assert net_type in ('socket', 'mpi'), 'Unknown network type.'
return _CAPI_DGLSenderCreate(net_type, msg_queue_size)
def _create_receiver(net_type, msg_queue_size=2000*1024*1024*1024):
def _create_receiver(net_type, msg_queue_size=2*1024*1024*1024):
"""Create a Receiver communicator via C api
Parameters
......@@ -52,7 +52,7 @@ def _create_receiver(net_type, msg_queue_size=2000*1024*1024*1024):
net_type : str
'socket' or 'mpi'
msg_queue_size : int
message queue size
message queue size (2GB by default)
"""
assert net_type in ('socket', 'mpi'), 'Unknown network type.'
return _CAPI_DGLReceiverCreate(net_type, msg_queue_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment