Unverified Commit 4f02bb75 authored by Chao Ma's avatar Chao Ma Committed by GitHub
Browse files

[KVStore] Made kvstore can support multiple NICs (#1150)

* API change of kvstore

* add demo for kvstore

* update

* remove duplicated log

* change queue size

* update

* update

* update

* update

* update

* update

* update

* update

* update

* fix lint

* change name

* update

* fix lint

* update

* update

* update

* update

* change message queue size to a python argument

* change default queue size to 2GB

* OMP_NUM_THREADS=1

* add multiple NICs support for kvstore

* test

* fix lint

* update

* update

* update

* update

* update

* update

* update

* fix lint

* fix lint

* update

* update

* update

* update
parent f8184153
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import dgl import dgl
import argparse import argparse
import mxnet as mx import mxnet as mx
import time
ID = [] ID = []
ID.append(mx.nd.array([0,1], dtype='int64')) ID.append(mx.nd.array([0,1], dtype='int64'))
...@@ -9,17 +10,34 @@ ID.append(mx.nd.array([2,3], dtype='int64')) ...@@ -9,17 +10,34 @@ ID.append(mx.nd.array([2,3], dtype='int64'))
ID.append(mx.nd.array([4,5], dtype='int64')) ID.append(mx.nd.array([4,5], dtype='int64'))
ID.append(mx.nd.array([6,7], dtype='int64')) ID.append(mx.nd.array([6,7], dtype='int64'))
DATA = []
DATA.append(mx.nd.array([[1.,1.,1.,],[1.,1.,1.,]]))
DATA.append(mx.nd.array([[2.,2.,2.,],[2.,2.,2.,]]))
DATA.append(mx.nd.array([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(mx.nd.array([[4.,4.,4.,],[4.,4.,4.,]]))
edata_partition_book = {'edata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')} edata_partition_book = {'edata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')}
ndata_partition_book = {'ndata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')} ndata_partition_book = {'ndata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')}
def start_client(): def start_client():
time.sleep(3)
client = dgl.contrib.start_client(ip_config='ip_config.txt', client = dgl.contrib.start_client(ip_config='ip_config.txt',
ndata_partition_book=ndata_partition_book, ndata_partition_book=ndata_partition_book,
edata_partition_book=edata_partition_book) edata_partition_book=edata_partition_book)
client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=mx.nd.array([[1.,1.,1.],[1.,1.,1.]]))
client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=mx.nd.array([[2.,2.,2.],[2.,2.,2.]])) tensor_edata = client.pull(name='edata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
tensor_ndata = client.pull(name='ndata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
print(tensor_edata)
client.barrier()
print(tensor_ndata)
client.barrier()
client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.barrier() client.barrier()
...@@ -27,11 +45,9 @@ def start_client(): ...@@ -27,11 +45,9 @@ def start_client():
tensor_ndata = client.pull(name='ndata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64')) tensor_ndata = client.pull(name='ndata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
print(tensor_edata) print(tensor_edata)
client.barrier() client.barrier()
print(tensor_ndata) print(tensor_ndata)
client.barrier() client.barrier()
if client.get_id() == 0: if client.get_id() == 0:
......
...@@ -16,14 +16,20 @@ edata_g2l.append({'edata':mx.nd.array([0,0,0,1,0,0,0,0], dtype='int64')}) ...@@ -16,14 +16,20 @@ edata_g2l.append({'edata':mx.nd.array([0,0,0,1,0,0,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,1,0,0], dtype='int64')}) edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,1,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,0,0,1], dtype='int64')}) edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,0,0,1], dtype='int64')})
DATA = []
DATA.append(mx.nd.array([[4.,4.,4.,],[4.,4.,4.,]]))
DATA.append(mx.nd.array([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(mx.nd.array([[2.,2.,2.,],[2.,2.,2.,]]))
DATA.append(mx.nd.array([[1.,1.,1.,],[1.,1.,1.,]]))
def start_server(args): def start_server(args):
dgl.contrib.start_server( dgl.contrib.start_server(
server_id=args.id, server_id=args.id,
ip_config='ip_config.txt', ip_config='ip_config.txt',
num_client=4, num_client=4,
ndata={'ndata':mx.nd.array([[0.,0.,0.],[0.,0.,0.]])}, ndata={'ndata':DATA[args.id]},
edata={'edata':mx.nd.array([[0.,0.,0.],[0.,0.,0.]])}, edata={'edata':DATA[args.id]},
ndata_g2l=ndata_g2l[args.id], ndata_g2l=ndata_g2l[args.id],
edata_g2l=edata_g2l[args.id]) edata_g2l=edata_g2l[args.id])
......
...@@ -10,6 +10,12 @@ ID.append(th.tensor([2,3])) ...@@ -10,6 +10,12 @@ ID.append(th.tensor([2,3]))
ID.append(th.tensor([4,5])) ID.append(th.tensor([4,5]))
ID.append(th.tensor([6,7])) ID.append(th.tensor([6,7]))
DATA = []
DATA.append(th.tensor([[1.,1.,1.,],[1.,1.,1.,]]))
DATA.append(th.tensor([[2.,2.,2.,],[2.,2.,2.,]]))
DATA.append(th.tensor([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(th.tensor([[4.,4.,4.,],[4.,4.,4.,]]))
edata_partition_book = {'edata':th.tensor([0,0,1,1,2,2,3,3])} edata_partition_book = {'edata':th.tensor([0,0,1,1,2,2,3,3])}
ndata_partition_book = {'ndata':th.tensor([0,0,1,1,2,2,3,3])} ndata_partition_book = {'ndata':th.tensor([0,0,1,1,2,2,3,3])}
...@@ -20,8 +26,18 @@ def start_client(): ...@@ -20,8 +26,18 @@ def start_client():
ndata_partition_book=ndata_partition_book, ndata_partition_book=ndata_partition_book,
edata_partition_book=edata_partition_book) edata_partition_book=edata_partition_book)
client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=th.tensor([[1.,1.,1.],[1.,1.,1.]]))
client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=th.tensor([[2.,2.,2.],[2.,2.,2.]])) tensor_edata = client.pull(name='edata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
tensor_ndata = client.pull(name='ndata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
print(tensor_edata)
client.barrier()
print(tensor_ndata)
client.barrier()
client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.barrier() client.barrier()
...@@ -29,11 +45,9 @@ def start_client(): ...@@ -29,11 +45,9 @@ def start_client():
tensor_ndata = client.pull(name='ndata', id_tensor=th.tensor([0,1,2,3,4,5,6,7])) tensor_ndata = client.pull(name='ndata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
print(tensor_edata) print(tensor_edata)
client.barrier() client.barrier()
print(tensor_ndata) print(tensor_ndata)
client.barrier() client.barrier()
if client.get_id() == 0: if client.get_id() == 0:
......
0 127.0.0.1 40050 0 127.0.0.1 50050
1 127.0.0.1 40051 1 127.0.0.1 50051
2 127.0.0.1 40052 2 127.0.0.1 50052
3 127.0.0.1 40053 3 127.0.0.1 50053
\ No newline at end of file \ No newline at end of file
...@@ -16,14 +16,20 @@ edata_g2l.append({'edata':th.tensor([0,0,0,1,0,0,0,0])}) ...@@ -16,14 +16,20 @@ edata_g2l.append({'edata':th.tensor([0,0,0,1,0,0,0,0])})
edata_g2l.append({'edata':th.tensor([0,0,0,0,0,1,0,0])}) edata_g2l.append({'edata':th.tensor([0,0,0,0,0,1,0,0])})
edata_g2l.append({'edata':th.tensor([0,0,0,0,0,0,0,1])}) edata_g2l.append({'edata':th.tensor([0,0,0,0,0,0,0,1])})
DATA = []
DATA.append(th.tensor([[4.,4.,4.,],[4.,4.,4.,]]))
DATA.append(th.tensor([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(th.tensor([[2.,2.,2.,],[2.,2.,2.,]]))
DATA.append(th.tensor([[1.,1.,1.,],[1.,1.,1.,]]))
def start_server(args): def start_server(args):
dgl.contrib.start_server( dgl.contrib.start_server(
server_id=args.id, server_id=args.id,
ip_config='ip_config.txt', ip_config='ip_config.txt',
num_client=4, num_client=4,
ndata={'ndata':th.tensor([[0.,0.,0.],[0.,0.,0.]])}, ndata={'ndata':DATA[args.id]},
edata={'edata':th.tensor([[0.,0.,0.],[0.,0.,0.]])}, edata={'edata':DATA[args.id]},
ndata_g2l=ndata_g2l[args.id], ndata_g2l=ndata_g2l[args.id],
edata_g2l=edata_g2l[args.id]) edata_g2l=edata_g2l[args.id])
......
...@@ -6,11 +6,15 @@ from ..network import _receiver_wait, _sender_connect ...@@ -6,11 +6,15 @@ from ..network import _receiver_wait, _sender_connect
from ..network import _send_kv_msg, _recv_kv_msg from ..network import _send_kv_msg, _recv_kv_msg
from ..network import KVMsgType, KVStoreMsg from ..network import KVMsgType, KVStoreMsg
from .. import backend as F
from .._ffi.ndarray import empty_shared_mem from .._ffi.ndarray import empty_shared_mem
import os
import numpy as np import numpy as np
import dgl.backend as F
import socket import socket
if os.name != 'nt':
import fcntl
import struct
def read_ip_config(filename): def read_ip_config(filename):
"""Read networking configuration from file. """Read networking configuration from file.
...@@ -551,7 +555,7 @@ class KVClient(object): ...@@ -551,7 +555,7 @@ class KVClient(object):
# find local server nodes # find local server nodes
for ID, addr in self._server_namebook.items(): for ID, addr in self._server_namebook.items():
server_ip, server_port = addr.split(':') server_ip, server_port = addr.split(':')
if client_ip == server_ip or server_ip == '127.0.0.1': if server_ip in self._ip4_addr_list():
self._local_server_id.add(ID) self._local_server_id.add(ID)
# send addr to server nodes # send addr to server nodes
...@@ -583,8 +587,8 @@ class KVClient(object): ...@@ -583,8 +587,8 @@ class KVClient(object):
for server_id in self._local_server_id: for server_id in self._local_server_id:
shared_data = empty_shared_mem(tensor_name+str(server_id), False, shape, dtype) shared_data = empty_shared_mem(tensor_name+str(server_id), False, shape, dtype)
dlpack = shared_data.to_dlpack() dlpack = shared_data.to_dlpack()
self._data_store[tensor_name] = F.zerocopy_from_dlpack(dlpack) self._data_store[tensor_name+str(server_id)] = F.zerocopy_from_dlpack(dlpack)
self._has_data.add(tensor_name) self._has_data.add(tensor_name+str(server_id))
def push(self, name, id_tensor, data_tensor): def push(self, name, id_tensor, data_tensor):
...@@ -622,11 +626,11 @@ class KVClient(object): ...@@ -622,11 +626,11 @@ class KVClient(object):
partial_data = data_tensor[start:end] partial_data = data_tensor[start:end]
if server[idx] in self._local_server_id and self._close_shared_mem == False: if server[idx] in self._local_server_id and self._close_shared_mem == False:
if (name+'-g2l-' in self._has_data) == True: if (name+'-g2l-'+str(server[idx]) in self._has_data) == True:
local_id = self._data_store[name+'-g2l-'][partial_id] local_id = self._data_store[name+'-g2l-'+str(server[idx])][partial_id]
else: else:
local_id = partial_id local_id = partial_id
self._push_handler(name+'-data-', local_id, data_tensor, self._data_store) self._push_handler(name+'-data-'+str(server[idx]), local_id, data_tensor, self._data_store)
else: else:
msg = KVStoreMsg( msg = KVStoreMsg(
type=KVMsgType.PUSH, type=KVMsgType.PUSH,
...@@ -642,7 +646,7 @@ class KVClient(object): ...@@ -642,7 +646,7 @@ class KVClient(object):
def pull(self, name, id_tensor): def pull(self, name, id_tensor):
"""Pull message from KVServer. """Pull message from KVServer.
Parameters Parameters
---------- ----------
name : str name : str
data name data name
...@@ -676,11 +680,11 @@ class KVClient(object): ...@@ -676,11 +680,11 @@ class KVClient(object):
partial_id = id_tensor[start:end] partial_id = id_tensor[start:end]
if server[idx] in self._local_server_id and self._close_shared_mem == False: if server[idx] in self._local_server_id and self._close_shared_mem == False:
if (name+'-g2l-' in self._has_data) == True: if (name+'-g2l-'+str(server[idx]) in self._has_data) == True:
local_id = self._data_store[name+'-g2l-'][partial_id] local_id = self._data_store[name+'-g2l-'+str(server[idx])][partial_id]
else: else:
local_id = partial_id local_id = partial_id
local_data[server[idx]] = self._pull_handler(name+'-data-', local_id, self._data_store) local_data[server[idx]] = self._pull_handler(name+'-data-'+str(server[idx]), local_id, self._data_store)
else: else:
msg = KVStoreMsg( msg = KVStoreMsg(
type=KVMsgType.PULL, type=KVMsgType.PULL,
...@@ -798,6 +802,31 @@ class KVClient(object): ...@@ -798,6 +802,31 @@ class KVClient(object):
return IP + ':' + str(port) return IP + ':' + str(port)
def _get_ip_address(self, NICname):
"""Return IP by given a NIC name
"""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915, # SIOCGIFADDR
struct.pack('256s', NICname[:15].encode("UTF-8"))
)[20:24])
def _ip4_addr_list(self):
"""Return a set of IPv4 address
"""
nic = set()
for ix in socket.if_nameindex():
name = ix[1]
ip = self._get_ip_address(name)
nic.add(ip)
return nic
def _takeId(self, elem): def _takeId(self, elem):
"""Used by sort """Used by sort
""" """
......
0 127.0.0.1 50050
1 127.0.0.1 50051
2 127.0.0.1 50052
3 127.0.0.1 50053
\ No newline at end of file
import dgl
import argparse
import mxnet as mx
import time
import backend as F
from multiprocessing import Process
ID = []
ID.append(mx.nd.array([0,1], dtype='int64'))
ID.append(mx.nd.array([2,3], dtype='int64'))
ID.append(mx.nd.array([4,5], dtype='int64'))
ID.append(mx.nd.array([6,7], dtype='int64'))
DATA = []
DATA.append(mx.nd.array([[1.,1.,1.,],[1.,1.,1.,]]))
DATA.append(mx.nd.array([[2.,2.,2.,],[2.,2.,2.,]]))
DATA.append(mx.nd.array([[3.,3.,3.,],[3.,3.,3.,]]))
DATA.append(mx.nd.array([[4.,4.,4.,],[4.,4.,4.,]]))
edata_partition_book = {'edata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')}
ndata_partition_book = {'ndata':mx.nd.array([0,0,1,1,2,2,3,3], dtype='int64')}
ndata_g2l = []
edata_g2l = []
ndata_g2l.append({'ndata':mx.nd.array([0,1,0,0,0,0,0,0], dtype='int64')})
ndata_g2l.append({'ndata':mx.nd.array([0,0,0,1,0,0,0,0], dtype='int64')})
ndata_g2l.append({'ndata':mx.nd.array([0,0,0,0,0,1,0,0], dtype='int64')})
ndata_g2l.append({'ndata':mx.nd.array([0,0,0,0,0,0,0,1], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,1,0,0,0,0,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,1,0,0,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,1,0,0], dtype='int64')})
edata_g2l.append({'edata':mx.nd.array([0,0,0,0,0,0,0,1], dtype='int64')})
def start_client(flag):
time.sleep(3)
client = dgl.contrib.start_client(ip_config='ip_config.txt',
ndata_partition_book=ndata_partition_book,
edata_partition_book=edata_partition_book,
close_shared_mem=flag)
client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.barrier()
tensor_edata = client.pull(name='edata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
tensor_ndata = client.pull(name='ndata', id_tensor=mx.nd.array([0,1,2,3,4,5,6,7], dtype='int64'))
target_tensor = mx.nd.array([[1., 1., 1.],
[1., 1., 1.],
[2., 2., 2.],
[2., 2., 2.],
[3., 3., 3.],
[3., 3., 3.],
[4., 4., 4.],
[4., 4., 4.]])
assert F.array_equal(tensor_edata, target_tensor)
assert F.array_equal(tensor_ndata, target_tensor)
client.barrier()
if client.get_id() == 0:
client.shut_down()
def start_server(server_id, num_client):
dgl.contrib.start_server(
server_id=server_id,
ip_config='ip_config.txt',
num_client=num_client,
ndata={'ndata':mx.nd.array([[0.,0.,0.],[0.,0.,0.]])},
edata={'edata':mx.nd.array([[0.,0.,0.],[0.,0.,0.]])},
ndata_g2l=ndata_g2l[server_id],
edata_g2l=edata_g2l[server_id])
if __name__ == '__main__':
# server process
p0 = Process(target=start_server, args=(0, 4))
p1 = Process(target=start_server, args=(1, 4))
p2 = Process(target=start_server, args=(2, 4))
p3 = Process(target=start_server, args=(3, 4))
# client process
p4 = Process(target=start_client, args=(True,))
p5 = Process(target=start_client, args=(True,))
p6 = Process(target=start_client, args=(False,))
p7 = Process(target=start_client, args=(False,))
# start server process
p0.start()
p1.start()
p2.start()
p3.start()
# start client process
p4.start()
p5.start()
p6.start()
p7.start()
p0.join()
p1.join()
p2.join()
p3.join()
p4.join()
p5.join()
p6.join()
p7.join()
\ No newline at end of file
0 127.0.0.1 40050
1 127.0.0.1 40051
2 127.0.0.1 40052
3 127.0.0.1 40053
\ No newline at end of file
import backend as F
import numpy as np
import scipy as sp
import dgl import dgl
import argparse
import torch as th import torch as th
from dgl import utils
import os
import time import time
import backend as F
client_namebook = { 0:'127.0.0.1:50061' } from multiprocessing import Process
server_namebook = { 0:'127.0.0.1:50062' } ID = []
ID.append(th.tensor([0,1]))
ID.append(th.tensor([2,3]))
ID.append(th.tensor([4,5]))
ID.append(th.tensor([6,7]))
def start_server(server_embed): DATA = []
server = dgl.contrib.KVServer( DATA.append(th.tensor([[1.,1.,1.,],[1.,1.,1.,]]))
server_id=0, DATA.append(th.tensor([[2.,2.,2.,],[2.,2.,2.,]]))
client_namebook=client_namebook, DATA.append(th.tensor([[3.,3.,3.,],[3.,3.,3.,]]))
server_addr=server_namebook[0]) DATA.append(th.tensor([[4.,4.,4.,],[4.,4.,4.,]]))
server.init_data(name='server_embed', data_tensor=server_embed) edata_partition_book = {'edata':th.tensor([0,0,1,1,2,2,3,3])}
ndata_partition_book = {'ndata':th.tensor([0,0,1,1,2,2,3,3])}
server.start() ndata_g2l = []
edata_g2l = []
def start_client(server_embed): ndata_g2l.append({'ndata':th.tensor([0,1,0,0,0,0,0,0])})
client = dgl.contrib.KVClient( ndata_g2l.append({'ndata':th.tensor([0,0,0,1,0,0,0,0])})
client_id=0, ndata_g2l.append({'ndata':th.tensor([0,0,0,0,0,1,0,0])})
server_namebook=server_namebook, ndata_g2l.append({'ndata':th.tensor([0,0,0,0,0,0,0,1])})
client_addr=client_namebook[0])
client.connect() edata_g2l.append({'edata':th.tensor([0,1,0,0,0,0,0,0])})
edata_g2l.append({'edata':th.tensor([0,0,0,1,0,0,0,0])})
edata_g2l.append({'edata':th.tensor([0,0,0,0,0,1,0,0])})
edata_g2l.append({'edata':th.tensor([0,0,0,0,0,0,0,1])})
# Initialize data on server def start_client(flag):
client.init_data(name='embed_0', server_id=0, shape=[5, 3], init_type='zero') time.sleep(3)
client.init_data(name='embed_1', server_id=0, shape=[5], init_type='uniform', low=0.0, high=0.0)
data_0 = th.tensor([[0., 0., 0., ], [1., 1., 1.], [2., 2., 2.]]) client = dgl.contrib.start_client(ip_config='ip_config.txt',
data_1 = th.tensor([0., 1., 2.]) ndata_partition_book=ndata_partition_book,
edata_partition_book=edata_partition_book,
close_shared_mem=flag)
for i in range(5): client.push(name='edata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.push(name='embed_0', server_id=0, id_tensor=th.tensor([0, 2, 4]), data_tensor=data_0) client.push(name='ndata', id_tensor=ID[client.get_id()], data_tensor=DATA[client.get_id()])
client.push(name='embed_1', server_id=0, id_tensor=th.tensor([0, 2, 4]), data_tensor=data_1)
client.push(name='server_embed', server_id=0, id_tensor=th.tensor([0, 2, 4]), data_tensor=data_1)
client.barrier() client.barrier()
client.pull(name='embed_0', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4])) tensor_edata = client.pull(name='edata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
msg = client.pull_wait() tensor_ndata = client.pull(name='ndata', id_tensor=th.tensor([0,1,2,3,4,5,6,7]))
assert msg.rank == 0
target_tensor_0 = th.tensor(
[[ 0., 0., 0.],
[ 0., 0., 0.],
[ 5., 5., 5.],
[ 0., 0., 0.],
[10., 10., 10.]])
assert th.equal(msg.data, target_tensor_0) == True
client.pull(name='embed_1', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4]))
msg = client.pull_wait()
target_tensor_1 = th.tensor([ 0., 0., 5., 0., 10.])
assert th.equal(msg.data, target_tensor_1) == True target_tensor = th.tensor([[1., 1., 1.],
[1., 1., 1.],
[2., 2., 2.],
[2., 2., 2.],
[3., 3., 3.],
[3., 3., 3.],
[4., 4., 4.],
[4., 4., 4.]])
client.pull(name='embed_0', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4])) assert F.array_equal(tensor_edata, target_tensor)
client.pull(name='embed_1', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4]))
client.pull(name='embed_0', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4]))
client.pull(name='embed_1', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4]))
client.pull(name='server_embed', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4]))
msg_0 = client.pull_wait() assert F.array_equal(tensor_ndata, target_tensor)
msg_1 = client.pull_wait()
msg_2 = client.pull_wait()
msg_3 = client.pull_wait()
msg_4 = client.pull_wait()
target_tensor_2 = th.tensor([ 2., 2., 7., 2., 12.]) client.barrier()
assert th.equal(msg_0.data, target_tensor_0) == True
assert th.equal(msg_1.data, target_tensor_1) == True
assert th.equal(msg_2.data, target_tensor_0) == True
assert th.equal(msg_3.data, target_tensor_1) == True
assert th.equal(msg_4.data, target_tensor_2) == True
server_embed += target_tensor_2
client.pull(name='server_embed', server_id=0, id_tensor=th.tensor([0, 1, 2, 3, 4]))
msg_5 = client.pull_wait()
assert th.equal(msg_5.data, target_tensor_2 * 2) == True if client.get_id() == 0:
client.shut_down()
client.shut_down() def start_server(server_id, num_client):
dgl.contrib.start_server(
server_id=server_id,
ip_config='ip_config.txt',
num_client=num_client,
ndata={'ndata':th.tensor([[0.,0.,0.],[0.,0.,0.]])},
edata={'edata':th.tensor([[0.,0.,0.],[0.,0.,0.]])},
ndata_g2l=ndata_g2l[server_id],
edata_g2l=edata_g2l[server_id])
if __name__ == '__main__': if __name__ == '__main__':
server_embed = th.tensor([2., 2., 2., 2., 2.])
# use pytorch shared memory # server process
server_embed.share_memory_() p0 = Process(target=start_server, args=(0, 4))
p1 = Process(target=start_server, args=(1, 4))
pid = os.fork() p2 = Process(target=start_server, args=(2, 4))
if pid == 0: p3 = Process(target=start_server, args=(3, 4))
start_server(server_embed)
else: # client process
time.sleep(2) # wait server start p4 = Process(target=start_client, args=(True,))
start_client(server_embed) p5 = Process(target=start_client, args=(True,))
p6 = Process(target=start_client, args=(False,))
assert th.equal(server_embed, th.tensor([ 4., 4., 14., 4., 24.])) == True p7 = Process(target=start_client, args=(False,))
\ No newline at end of file
# start server process
p0.start()
p1.start()
p2.start()
p3.start()
# start client process
p4.start()
p5.start()
p6.start()
p7.start()
p0.join()
p1.join()
p2.join()
p3.join()
p4.join()
p5.join()
p6.join()
p7.join()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment