"...en/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "ba06124e4ac1516bba595113163f259a7dd89a7a"
Unverified Commit 4e5780e3 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[DistDGL] remove unused rpc related files (#5878)

parent 66c04855
"""DGL Distributed Training Infrastructure."""
from __future__ import absolute_import
import time
from collections import namedtuple
from enum import Enum
import dgl.backend as F
from ._ffi.function import _init_api
_init_api("dgl.network")
################################ Common Network Components ##################################
_WAIT_TIME_SEC = 3 # 3 seconds
def _network_wait():
"""Sleep for a few seconds"""
time.sleep(_WAIT_TIME_SEC)
def _create_sender(net_type, msg_queue_size=2 * 1024 * 1024 * 1024):
"""Create a Sender communicator via C api
Parameters
----------
net_type : str
'socket' or 'mpi'
msg_queue_size : int
message queue size (2GB by default)
"""
assert net_type in ("socket", "mpi"), "Unknown network type."
return _CAPI_DGLSenderCreate(net_type, msg_queue_size)
def _create_receiver(net_type, msg_queue_size=2 * 1024 * 1024 * 1024):
"""Create a Receiver communicator via C api
Parameters
----------
net_type : str
'socket' or 'mpi'
msg_queue_size : int
message queue size (2GB by default)
"""
assert net_type in ("socket", "mpi"), "Unknown network type."
return _CAPI_DGLReceiverCreate(net_type, msg_queue_size)
def _finalize_sender(sender):
"""Finalize Sender communicator
Parameters
----------
sender : ctypes.c_void_p
C Sender handle
"""
_CAPI_DGLFinalizeSender(sender)
def _finalize_receiver(receiver):
"""Finalize Receiver Communicator"""
_CAPI_DGLFinalizeReceiver(receiver)
def _add_receiver_addr(sender, ip_addr, port, recv_id):
"""Add Receiver IP address to namebook
Parameters
----------
sender : ctypes.c_void_p
C Sender handle
ip_addr : str
IP address of Receiver
port : int
listen of Receiver
recv_id : int
Receiver ID
"""
assert recv_id >= 0, "recv_id cannot be a negative number."
_CAPI_DGLSenderAddReceiver(sender, ip_addr, int(port), int(recv_id))
def _sender_connect(sender):
"""Connect to all the Receiver
Parameters
----------
sender : ctypes.c_void_p
C Sender handle
"""
_CAPI_DGLSenderConnect(sender)
def _receiver_wait(receiver, ip_addr, port, num_sender):
"""Wait all Sender to connect.
Parameters
----------
receiver : ctypes.c_void_p
C Receiver handle
ip_addr : str
IP address of Receiver
port : int
port of Receiver
num_sender : int
total number of Sender
"""
assert num_sender >= 0, "num_sender cannot be a negative number."
_CAPI_DGLReceiverWait(receiver, ip_addr, int(port), int(num_sender))
################################ Distributed Sampler Components ################################
def _send_sampler_end_signal(sender, recv_id):
"""Send an epoch-end signal to remote Receiver.
Parameters
----------
sender : ctypes.c_void_p
C sender handle
recv_id : int
Receiver ID
"""
assert recv_id >= 0, "recv_id cannot be a negative number."
_CAPI_SenderSendSamplerEndSignal(sender, int(recv_id))
################################ Distributed KVStore Components ################################
class KVMsgType(Enum):
"""Type of kvstore message"""
FINAL = 1
INIT = 2
PUSH = 3
PULL = 4
PULL_BACK = 5
BARRIER = 6
IP_ID = 7
GET_SHAPE = 8
GET_SHAPE_BACK = 9
KVStoreMsg = namedtuple("KVStoreMsg", "type rank name id data shape c_ptr")
"""Message of DGL kvstore
Data Field
----------
type : KVMsgType
Type of DGL kvstore message
rank : int
sender's ID
name : str
data name
id : tensor (mx.ndarray or torch.tensor)
data vector storing the global IDs
data : tensor (mx.ndarray or torch.tensor)
data matrix with the same row size of id
c_ptr : void*
c pointer of message
"""
def _send_kv_msg(sender, msg, recv_id):
"""Send kvstore message.
Parameters
----------
sender : ctypes.c_void_p
C sender handle
msg : KVStoreMsg
kvstore message
recv_id : int
receiver's ID
"""
if msg.type == KVMsgType.PULL:
tensor_id = F.zerocopy_to_dgl_ndarray(msg.id)
_CAPI_SenderSendKVMsg(
sender, int(recv_id), msg.type.value, msg.rank, msg.name, tensor_id
)
elif msg.type in (KVMsgType.INIT, KVMsgType.GET_SHAPE_BACK):
tensor_shape = F.zerocopy_to_dgl_ndarray(msg.shape)
_CAPI_SenderSendKVMsg(
sender,
int(recv_id),
msg.type.value,
msg.rank,
msg.name,
tensor_shape,
)
elif msg.type in (KVMsgType.IP_ID, KVMsgType.GET_SHAPE):
_CAPI_SenderSendKVMsg(
sender, int(recv_id), msg.type.value, msg.rank, msg.name
)
elif msg.type in (KVMsgType.FINAL, KVMsgType.BARRIER):
_CAPI_SenderSendKVMsg(sender, int(recv_id), msg.type.value, msg.rank)
else:
tensor_id = F.zerocopy_to_dgl_ndarray(msg.id)
data = F.zerocopy_to_dgl_ndarray(msg.data)
_CAPI_SenderSendKVMsg(
sender,
int(recv_id),
msg.type.value,
msg.rank,
msg.name,
tensor_id,
data,
)
def _recv_kv_msg(receiver):
"""Receive kvstore message.
Parameters
----------
receiver : ctypes.c_void_p
C Receiver handle
Return
------
KVStoreMsg
kvstore message
"""
msg_ptr = CAPI_ReceiverRecvKVMsg(receiver)
msg_type = KVMsgType(_CAPI_ReceiverGetKVMsgType(msg_ptr))
rank = _CAPI_ReceiverGetKVMsgRank(msg_ptr)
if msg_type == KVMsgType.PULL:
name = _CAPI_ReceiverGetKVMsgName(msg_ptr)
tensor_id = F.zerocopy_from_dgl_ndarray(
_CAPI_ReceiverGetKVMsgID(msg_ptr)
)
msg = KVStoreMsg(
type=msg_type,
rank=rank,
name=name,
id=tensor_id,
data=None,
shape=None,
c_ptr=msg_ptr,
)
return msg
elif msg_type in (KVMsgType.INIT, KVMsgType.GET_SHAPE_BACK):
name = _CAPI_ReceiverGetKVMsgName(msg_ptr)
tensor_shape = F.zerocopy_from_dgl_ndarray(
_CAPI_ReceiverGetKVMsgShape(msg_ptr)
)
msg = KVStoreMsg(
type=msg_type,
rank=rank,
name=name,
id=None,
data=None,
shape=tensor_shape,
c_ptr=msg_ptr,
)
return msg
elif msg_type in (KVMsgType.IP_ID, KVMsgType.GET_SHAPE):
name = _CAPI_ReceiverGetKVMsgName(msg_ptr)
msg = KVStoreMsg(
type=msg_type,
rank=rank,
name=name,
id=None,
data=None,
shape=None,
c_ptr=msg_ptr,
)
return msg
elif msg_type in (KVMsgType.FINAL, KVMsgType.BARRIER):
msg = KVStoreMsg(
type=msg_type,
rank=rank,
name=None,
id=None,
data=None,
shape=None,
c_ptr=msg_ptr,
)
return msg
else:
name = _CAPI_ReceiverGetKVMsgName(msg_ptr)
tensor_id = F.zerocopy_from_dgl_ndarray(
_CAPI_ReceiverGetKVMsgID(msg_ptr)
)
data = F.zerocopy_from_dgl_ndarray(_CAPI_ReceiverGetKVMsgData(msg_ptr))
msg = KVStoreMsg(
type=msg_type,
rank=rank,
name=name,
id=tensor_id,
data=data,
shape=None,
c_ptr=msg_ptr,
)
return msg
raise RuntimeError("Unknown message type: %d" % msg_type.value)
def _clear_kv_msg(msg):
"""Clear data of kvstore message"""
F.sync()
if msg.c_ptr is not None:
_CAPI_DeleteKVMsg(msg.c_ptr)
def _fast_pull(
name,
id_tensor,
machine_count,
group_count,
machine_id,
client_id,
partition_book,
g2l,
local_data,
sender,
receiver,
):
"""Pull message
Parameters
----------
name : str
data name string
id_tensor : tensor
tensor of ID
machine_count : int
count of total machine
group_count : int
count of server group
machine_id : int
current machine id
client_id : int
current client ID
partition_book : tensor
tensor of partition book
g2l : tensor
tensor of global2local
local_data : tensor
tensor of local shared data
sender : ctypes.c_void_p
C Sender handle
receiver : ctypes.c_void_p
C Receiver handle
Return
------
tensor
target tensor
"""
if g2l is not None:
res_tensor = _CAPI_FastPull(
name,
machine_id,
machine_count,
group_count,
client_id,
F.zerocopy_to_dgl_ndarray(id_tensor),
F.zerocopy_to_dgl_ndarray(partition_book),
F.zerocopy_to_dgl_ndarray(local_data),
sender,
receiver,
"has_g2l",
F.zerocopy_to_dgl_ndarray(g2l),
)
else:
res_tensor = _CAPI_FastPull(
name,
machine_id,
machine_count,
group_count,
client_id,
F.zerocopy_to_dgl_ndarray(id_tensor),
F.zerocopy_to_dgl_ndarray(partition_book),
F.zerocopy_to_dgl_ndarray(local_data),
sender,
receiver,
"no_g2l",
)
return F.zerocopy_from_dgl_ndarray(res_tensor)
This diff is collapsed.
/**
* Copyright (c) 2018 by Contributors
* @file graph/network.h
* @brief DGL networking related APIs
*/
#ifndef DGL_GRAPH_NETWORK_H_
#define DGL_GRAPH_NETWORK_H_
#include <dgl/runtime/ndarray.h>
#include <dmlc/logging.h>
#include <string.h>
#include <string>
#include <vector>
#include "../c_api_common.h"
#include "../rpc/network/msg_queue.h"
using dgl::runtime::NDArray;
namespace dgl {
namespace network {
/**
* @brief Create NDArray from raw data
*/
NDArray CreateNDArrayFromRaw(
std::vector<int64_t> shape, DGLDataType dtype, DGLContext ctx, void* raw);
/**
* @brief Message type for DGL distributed training
*/
enum MessageType {
/**
* @brief Message for send/recv NodeFlow
*/
kNodeFlowMsg = 0,
/**
* @brief Message for end-signal
*/
kFinalMsg = 1,
/**
* @brief Initialize KVStore
*/
kInitMsg = 2,
/**
* @brief Push msg to KVStore
*/
kPushMsg = 3,
/**
* @brief Pull msg from KVStore
*/
kPullMsg = 4,
/**
* @brief PullBack msg from KVStore
*/
kPullBackMsg = 5,
/**
* @brief Barrier msg for KVStore
*/
kBarrierMsg = 6,
/**
* @brief IP and ID msg for KVStore
*/
kIPIDMsg = 7,
/**
* @brief Get data shape msg for KVStore
*/
kGetShapeMsg = 8,
/**
* @brief Get data shape back msg for KVStore
*/
kGetShapeBackMsg = 9
};
/**
* @brief Meta data for NDArray message
*/
class ArrayMeta {
public:
/**
* @brief ArrayMeta constructor.
* @param msg_type type of message
*/
explicit ArrayMeta(int msg_type) : msg_type_(msg_type), ndarray_count_(0) {}
/**
* @brief Construct ArrayMeta from binary data buffer.
* @param buffer data buffer
* @param size data size
*/
ArrayMeta(char* buffer, int64_t size) {
CHECK_NOTNULL(buffer);
this->Deserialize(buffer, size);
}
/**
* @return message type
*/
inline int msg_type() const { return msg_type_; }
/**
* @return count of ndarray
*/
inline int ndarray_count() const { return ndarray_count_; }
/**
* @brief Add NDArray meta data to ArrayMeta
* @param array DGL NDArray
*/
void AddArray(const NDArray& array);
/**
* @brief Serialize ArrayMeta to data buffer
* @param size size of serialized message
* @return pointer of data buffer
*/
char* Serialize(int64_t* size);
/**
* @brief Deserialize ArrayMeta from data buffer
* @param buffer data buffer
* @param size size of data buffer
*/
void Deserialize(char* buffer, int64_t size);
/**
* @brief type of message
*/
int msg_type_;
/**
* @brief count of ndarray in MetaMsg
*/
int ndarray_count_;
/**
* @brief DataType for each NDArray
*/
std::vector<DGLDataType> data_type_;
/**
* @brief We first write the ndim to data_shape_
* and then write the data shape.
*/
std::vector<int64_t> data_shape_;
};
/**
* @brief C structure for holding DGL KVServer message
*/
class KVStoreMsg {
public:
/**
* @brief KVStoreMsg constructor.
*/
KVStoreMsg() {}
/**
* @brief Construct KVStoreMsg from binary data buffer.
* @param buffer data buffer
* @param size data size
*/
KVStoreMsg(char* buffer, int64_t size) {
CHECK_NOTNULL(buffer);
this->Deserialize(buffer, size);
}
/**
* @brief Serialize KVStoreMsg to data buffer
* Note that we don't serialize ID and data here.
* @param size size of serialized message
* @return pointer of data buffer
*/
char* Serialize(int64_t* size);
/**
* @brief Deserialize KVStoreMsg from data buffer
* @param buffer data buffer
* @param size size of data buffer
*/
void Deserialize(char* buffer, int64_t size);
/**
* @brief Message type of kvstore
*/
int msg_type;
/**
* @brief Sender's ID
*/
int rank;
/**
* @brief data name
*/
std::string name;
/**
* @brief data ID
*/
NDArray id;
/**
* @brief data matrix
*/
NDArray data;
/**
* @brief data shape
*/
NDArray shape;
};
} // namespace network
} // namespace dgl
#endif // DGL_GRAPH_NETWORK_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment