Unverified Commit 0114f4fd authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

Merge branch 'master' into fix_readme

parents 14ab462f 40b44a43
......@@ -208,6 +208,10 @@ Methods for getting or changing the device on which the graph is hosted.
DGLGraph.to
DGLGraph.device
DGLGraph.cpu
DGLGraph.pin_memory_
DGLGraph.unpin_memory_
DGLGraph.is_pinned
Misc
----
......
......@@ -129,6 +129,37 @@ struct COOMatrix {
aten::IsNullArray(data) ? data : data.CopyTo(ctx, stream),
row_sorted, col_sorted);
}
/*!
* \brief Pin the row, col and data (if not Null) of the matrix.
* \note This is an in-place method. Behavior depends on the current context,
* kDLCPU: will be pinned;
* kDLCPUPinned: directly return;
* kDLGPU: invalid, will throw an error.
* The context check is deferred to pinning the NDArray.
*/
inline void PinMemory_() {
row.PinMemory_();
col.PinMemory_();
if (!aten::IsNullArray(data)) {
data.PinMemory_();
}
}
/*!
* \brief Unpin the row, col and data (if not Null) of the matrix.
* \note This is an in-place method. Behavior depends on the current context,
* kDLCPUPinned: will be unpinned;
* others: directly return.
* The context check is deferred to unpinning the NDArray.
*/
inline void UnpinMemory_() {
row.UnpinMemory_();
col.UnpinMemory_();
if (!aten::IsNullArray(data)) {
data.UnpinMemory_();
}
}
};
///////////////////////// COO routines //////////////////////////
......
......@@ -122,6 +122,37 @@ struct CSRMatrix {
aten::IsNullArray(data) ? data : data.CopyTo(ctx, stream),
sorted);
}
/*!
* \brief Pin the indptr, indices and data (if not Null) of the matrix.
* \note This is an in-place method. Behavior depends on the current context,
* kDLCPU: will be pinned;
* kDLCPUPinned: directly return;
* kDLGPU: invalid, will throw an error.
* The context check is deferred to pinning the NDArray.
*/
inline void PinMemory_() {
indptr.PinMemory_();
indices.PinMemory_();
if (!aten::IsNullArray(data)) {
data.PinMemory_();
}
}
/*!
* \brief Unpin the indptr, indices and data (if not Null) of the matrix.
* \note This is an in-place method. Behavior depends on the current context,
* kDLCPUPinned: will be unpinned;
* others: directly return.
* The context check is deferred to unpinning the NDArray.
*/
inline void UnpinMemory_() {
indptr.UnpinMemory_();
indices.UnpinMemory_();
if (!aten::IsNullArray(data)) {
data.UnpinMemory_();
}
}
};
///////////////////////// CSR routines //////////////////////////
......
......@@ -37,10 +37,13 @@
* // Now XPU is a placeholder for array->ctx.device_type
* DeviceSpecificImplementation<XPU>(...);
* });
*
* We treat pinned memory as normal host memory if we don't want
* to enable CUDA UVA access for this operator
*/
#ifdef DGL_USE_CUDA
#define ATEN_XPU_SWITCH_CUDA(val, XPU, op, ...) do { \
if ((val) == kDLCPU) { \
if ((val) == kDLCPU || (val) == kDLCPUPinned) { \
constexpr auto XPU = kDLCPU; \
{__VA_ARGS__} \
} else if ((val) == kDLGPU) { \
......
......@@ -110,6 +110,11 @@ class BaseHeteroGraph : public runtime::Object {
*/
virtual DLContext Context() const = 0;
/*!
* \brief Check if this graph is pinned.
*/
virtual bool IsPinned() const = 0;
/*!
* \brief Get the number of integer bits used to store node/edge ids (32 or 64).
*/
......
......@@ -148,19 +148,17 @@ class DeviceAPI {
/*!
* \brief Pin host memory using cudaHostRegister().
*
* \param ctx The context of pinning and mapping.
* \param ptr The host memory pointer to be pinned.
* \param nbytes The size to be pinned.
*/
DGL_DLL virtual void PinData(DGLContext ctx, void* ptr, size_t nbytes);
DGL_DLL virtual void PinData(void* ptr, size_t nbytes);
/*!
* \brief Unpin host memory ussing cudaHostUnregister().
* \brief Unpin host memory using cudaHostUnregister().
*
* \param ctx The context to unmap and unpin.
* \param ptr The host memory pointer to be unpinned.
*/
DGL_DLL virtual void UnpinData(DGLContext ctx, void* ptr);
DGL_DLL virtual void UnpinData(void* ptr);
/*!
* \brief Allocate temporal workspace for backend execution.
......@@ -190,12 +188,21 @@ class DeviceAPI {
DGL_DLL virtual void FreeWorkspace(DGLContext ctx, void* ptr);
/*!
* \brief Get device API base don context.
* \brief Get device API based on context.
* \param ctx The context
* \param allow_missing Whether allow missing
* \return The corresponding device API.
*/
DGL_DLL static DeviceAPI* Get(DGLContext ctx, bool allow_missing = false);
/*!
* \brief Get device API based on context.
* \param dev_type The device type
* \param allow_missing Whether allow missing
* \return The corresponding device API.
*/
DGL_DLL static DeviceAPI* Get(DLDeviceType dev_type, bool allow_missing = false);
};
/*! \brief The device type bigger than this is RPC device */
......
......@@ -171,6 +171,27 @@ class NDArray {
* \brief Return a new array with a copy of the content.
*/
inline NDArray Clone(const DGLStreamHandle &stream = nullptr) const;
/*!
* \brief In-place method to pin the current array by calling PinData
* on the underlying DLTensor.
* \note This is an in-place method. Behavior depends on the current context,
* kDLCPU: will be pinned;
* kDLCPUPinned: directly return;
* kDLGPU: invalid, will throw an error.
*/
inline void PinMemory_();
/*!
* \brief In-place method to unpin the current array by calling UnpinData
* on the underlying DLTensor.
* \note This is an in-place method. Behavior depends on the current context,
* kDLCPUPinned: will be unpinned;
* others: directly return.
*/
inline void UnpinMemory_();
/*!
* \brief Check if the array is pinned.
*/
inline bool IsPinned() const;
/*!
* \brief Load NDArray from stream
* \param stream The input data stream
......@@ -272,6 +293,27 @@ class NDArray {
DGL_DLL static void CopyFromTo(
DLTensor* from, DLTensor* to, DGLStreamHandle stream = nullptr);
/*!
* \brief Function to pin the data of a DLTensor.
* \param tensor The array to be pinned.
* \note Data of the given array will be pinned inplace.
* Behavior depends on the current context,
* kDLCPU: will be pinned;
* kDLCPUPinned: directly return;
* kDLGPU: invalid, will throw an error.
*/
DGL_DLL static void PinData(DLTensor* tensor);
/*!
* \brief Function to unpin the data of a DLTensor.
* \param tensor The array to be unpinned.
* \note Data of the given array will be unpinned inplace.
* Behavior depends on the current context,
* kDLCPUPinned: will be unpinned;
* others: directly return.
*/
DGL_DLL static void UnpinData(DLTensor* tensor);
// internal namespace
struct Internal;
private:
......@@ -431,6 +473,21 @@ inline NDArray NDArray::Clone(const DGLStreamHandle &stream) const {
return this->CopyTo(dptr->ctx, stream);
}
inline void NDArray::PinMemory_() {
CHECK(data_ != nullptr);
PinData(&(data_->dl_tensor));
}
inline void NDArray::UnpinMemory_() {
CHECK(data_ != nullptr);
UnpinData(&(data_->dl_tensor));
}
inline bool NDArray::IsPinned() const {
CHECK(data_ != nullptr);
return data_->dl_tensor.ctx.device_type == kDLCPUPinned;
}
inline int NDArray::use_count() const {
if (data_ == nullptr) return 0;
return data_->ref_counter_.load(std::memory_order_relaxed);
......
......@@ -5458,6 +5458,74 @@ class DGLHeteroGraph(object):
"""
return self.to(F.cpu())
def pin_memory_(self):
"""Pin the graph structure to the page-locked memory.
This is an **inplace** method. The graph structure must be on CPU to be pinned.
If the graph struture is already pinned, the function directly returns it.
Materialization of new sparse formats for pinned graphs is not allowed.
To avoid implicit formats materialization during training,
you should create all the needed formats before pinnning.
But cloning and materialization is fine. See the examples below.
Returns
-------
DGLGraph
The pinned graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> g = dgl.graph((torch.tensor([1, 0]), torch.tensor([1, 2])))
>>> g.pin_memory_()
Materialization of new sparse formats is not allowed for pinned graphs.
>>> g.create_formats_() # This would raise an error! You should do this before pinning.
Cloning and materializing new formats is allowed. The returned graph is **not** pinned.
>>> g1 = g.formats(['csc'])
>>> assert not g1.is_pinned()
"""
if self._graph.is_pinned():
return self
if F.device_type(self.device) != 'cpu':
raise DGLError("The graph structure must be on CPU to be pinned.")
self._graph.pin_memory_()
return self
def unpin_memory_(self):
"""Unpin the graph structure from the page-locked memory.
This is an **inplace** method.If the graph struture is not pinned,
e.g., on CPU or GPU, the function directly returns it.
Returns
-------
DGLGraph
The unpinned graph.
"""
if not self._graph.is_pinned():
return self
self._graph.unpin_memory_()
return self
def is_pinned(self):
"""Check if the graph structure is pinned to the page-locked memory.
Returns
-------
bool
True if the graph structure is pinned.
"""
return self._graph.is_pinned()
def clone(self):
"""Return a heterograph object that is a clone of current graph.
......
......@@ -234,6 +234,44 @@ class HeteroGraphIndex(ObjectBase):
"""
return _CAPI_DGLHeteroCopyTo(self, ctx.device_type, ctx.device_id)
def pin_memory_(self):
"""Pin this graph to the page-locked memory.
NOTE: This is an inplace method.
The graph structure must be on CPU to be pinned.
If the graph struture is already pinned, the function directly returns it.
Returns
-------
HeteroGraphIndex
The pinned graph index.
"""
return _CAPI_DGLHeteroPinMemory_(self)
def unpin_memory_(self):
"""Unpin this graph from the page-locked memory.
NOTE: this is an inplace method.
If the graph struture is not pinned, e.g., on CPU or GPU,
the function directly returns it.
Returns
-------
HeteroGraphIndex
The unpinned graph index.
"""
return _CAPI_DGLHeteroUnpinMemory_(self)
def is_pinned(self):
"""Check if this graph is pinned to the page-locked memory.
Returns
-------
bool
True if the graph is pinned.
"""
return bool(_CAPI_DGLHeteroIsPinned(self))
def shared_memory(self, name, ntypes=None, etypes=None, formats=('coo', 'csr', 'csc')):
"""Return a copy of this graph in shared memory
......
......@@ -24,7 +24,7 @@ NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
int64_t num_feat = 1;
std::vector<int64_t> shape{len};
CHECK_EQ(array->ctx.device_type, kDLCPU);
CHECK_EQ(array->ctx.device_type, kDLCPUPinned);
CHECK_EQ(index->ctx.device_type, kDLGPU);
for (int d = 1; d < array->ndim; ++d) {
......
......@@ -15,10 +15,10 @@ namespace aten {
NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
#ifdef DGL_USE_CUDA
CHECK_EQ(array->ctx.device_type, kDLCPU) << "Only the CPU device type input "
<< "array supported";
CHECK_EQ(index->ctx.device_type, kDLGPU) << "Only the GPU device type input "
<< "index supported";
CHECK_EQ(array->ctx.device_type, kDLCPUPinned)
<< "Only the CPUPinned device type input array is supported";
CHECK_EQ(index->ctx.device_type, kDLGPU)
<< "Only the GPU device type input index is supported";
CHECK_GE(array->ndim, 1) << "Only support array with at least 1 dimension";
CHECK_EQ(index->ndim, 1) << "Index array must be an 1D array.";
......
......@@ -267,6 +267,16 @@ HeteroGraphPtr HeteroGraph::CopyTo(HeteroGraphPtr g, const DLContext &ctx,
hgindex->num_verts_per_type_));
}
void HeteroGraph::PinMemory_() {
for (auto g : relation_graphs_)
g->PinMemory_();
}
void HeteroGraph::UnpinMemory_() {
for (auto g : relation_graphs_)
g->UnpinMemory_();
}
std::string HeteroGraph::SharedMemName() const {
return shared_mem_ ? shared_mem_->GetName() : "";
}
......
......@@ -58,6 +58,10 @@ class HeteroGraph : public BaseHeteroGraph {
return relation_graphs_[0]->Context();
}
bool IsPinned() const override {
return relation_graphs_[0]->IsPinned();
}
uint8_t NumBits() const override {
return relation_graphs_[0]->NumBits();
}
......@@ -228,6 +232,25 @@ class HeteroGraph : public BaseHeteroGraph {
static HeteroGraphPtr CopyTo(HeteroGraphPtr g, const DLContext &ctx,
const DGLStreamHandle &stream = nullptr);
/*!
* \brief Pin all relation graphs of the current graph.
* \note The graph will be pinned inplace. Behavior depends on the current context,
* kDLCPU: will be pinned;
* kDLCPUPinned: directly return;
* kDLGPU: invalid, will throw an error.
* The context check is deferred to pinning the NDArray.
*/
void PinMemory_();
/*!
* \brief Unpin all relation graphs of the current graph.
* \note The graph will be unpinned inplace. Behavior depends on the current context,
* kDLCPUPinned: will be unpinned;
* others: directly return.
* The context check is deferred to unpinning the NDArray.
*/
void UnpinMemory_();
/*! \brief Copy the data to shared memory.
*
* Also save names of node types and edge types of the HeteroGraph object to shared memory
......
......@@ -173,7 +173,19 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroDataType")
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroContext")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef hg = args[0];
*rv = hg->Context();
// The Python side only recognizes CPU and GPU device type.
// Use is_pinned() to checked whether the object is
// on page-locked memory
if (hg->Context().device_type == kDLCPUPinned)
*rv = DLContext{kDLCPU, 0};
else
*rv = hg->Context();
});
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroIsPinned")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef hg = args[0];
*rv = hg->IsPinned();
});
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroNumBits")
......@@ -473,6 +485,22 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCopyTo")
*rv = HeteroGraphRef(hg_new);
});
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPinMemory_")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef hg = args[0];
auto hgindex = std::dynamic_pointer_cast<HeteroGraph>(hg.sptr());
hgindex->PinMemory_();
*rv = hg;
});
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroUnpinMemory_")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef hg = args[0];
auto hgindex = std::dynamic_pointer_cast<HeteroGraph>(hg.sptr());
hgindex->UnpinMemory_();
*rv = hg;
});
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCopyToSharedMem")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef hg = args[0];
......
......@@ -133,6 +133,10 @@ class UnitGraph::COO : public BaseHeteroGraph {
return adj_.row->ctx;
}
bool IsPinned() const override {
return adj_.row.IsPinned();
}
uint8_t NumBits() const override {
return adj_.row->dtype.bits;
}
......@@ -156,6 +160,17 @@ class UnitGraph::COO : public BaseHeteroGraph {
return COO(meta_graph_, adj_.CopyTo(ctx, stream));
}
/*! \brief Pin the adj_: COOMatrix of the COO graph. */
void PinMemory_() {
adj_.PinMemory_();
}
/*! \brief Unpin the adj_: COOMatrix of the COO graph. */
void UnpinMemory_() {
adj_.UnpinMemory_();
}
bool IsMultigraph() const override {
return aten::COOHasDuplicate(adj_);
}
......@@ -520,6 +535,10 @@ class UnitGraph::CSR : public BaseHeteroGraph {
return adj_.indices->ctx;
}
bool IsPinned() const override {
return adj_.indices.IsPinned();
}
uint8_t NumBits() const override {
return adj_.indices->dtype.bits;
}
......@@ -547,6 +566,16 @@ class UnitGraph::CSR : public BaseHeteroGraph {
}
}
/*! \brief Pin the adj_: CSRMatrix of the CSR graph. */
void PinMemory_() {
adj_.PinMemory_();
}
/*! \brief Unpin the adj_: CSRMatrix of the CSR graph. */
void UnpinMemory_() {
adj_.UnpinMemory_();
}
bool IsMultigraph() const override {
return aten::CSRHasDuplicate(adj_);
}
......@@ -829,6 +858,10 @@ DLContext UnitGraph::Context() const {
return GetAny()->Context();
}
bool UnitGraph::IsPinned() const {
return GetAny()->IsPinned();
}
uint8_t UnitGraph::NumBits() const {
return GetAny()->NumBits();
}
......@@ -1263,6 +1296,24 @@ HeteroGraphPtr UnitGraph::CopyTo(HeteroGraphPtr g, const DLContext &ctx,
}
}
void UnitGraph::PinMemory_() {
if (this->in_csr_->defined())
this->in_csr_->PinMemory_();
if (this->out_csr_->defined())
this->out_csr_->PinMemory_();
if (this->coo_->defined())
this->coo_->PinMemory_();
}
void UnitGraph::UnpinMemory_() {
if (this->in_csr_->defined())
this->in_csr_->UnpinMemory_();
if (this->out_csr_->defined())
this->out_csr_->UnpinMemory_();
if (this->coo_->defined())
this->coo_->UnpinMemory_();
}
void UnitGraph::InvalidateCSR() {
this->out_csr_ = CSRPtr(new CSR());
}
......@@ -1334,6 +1385,10 @@ UnitGraph::CSRPtr UnitGraph::GetInCSR(bool inplace) const {
// Prefers converting from COO since it is parallelized.
// TODO(BarclayII): need benchmarking.
if (!in_csr_->defined()) {
// inplace new formats materialization is not allowed for pinned graphs
if (inplace && IsPinned())
LOG(FATAL) << "Cannot create new formats for pinned graphs, " <<
"please create the CSC format before pinning.";
if (coo_->defined()) {
const auto& newadj = aten::COOToCSR(
aten::COOTranspose(coo_->adj()));
......@@ -1365,6 +1420,10 @@ UnitGraph::CSRPtr UnitGraph::GetOutCSR(bool inplace) const {
// Prefers converting from COO since it is parallelized.
// TODO(BarclayII): need benchmarking.
if (!out_csr_->defined()) {
// inplace new formats materialization is not allowed for pinned graphs
if (inplace && IsPinned())
LOG(FATAL) << "Cannot create new formats for pinned graphs, " <<
"please create the CSR format before pinning.";
if (coo_->defined()) {
const auto& newadj = aten::COOToCSR(coo_->adj());
......@@ -1393,6 +1452,10 @@ UnitGraph::COOPtr UnitGraph::GetCOO(bool inplace) const {
CodeToStr(formats_) << ", cannot create COO matrix.";
COOPtr ret = coo_;
if (!coo_->defined()) {
// inplace new formats materialization is not allowed for pinned graphs
if (inplace && IsPinned())
LOG(FATAL) << "Cannot create new formats for pinned graphs, " <<
"please create the COO format before pinning.";
if (in_csr_->defined()) {
const auto& newadj = aten::COOTranspose(aten::CSRToCOO(in_csr_->adj(), true));
......
......@@ -83,6 +83,8 @@ class UnitGraph : public BaseHeteroGraph {
DLContext Context() const override;
bool IsPinned() const override;
uint8_t NumBits() const override;
bool IsMultigraph() const override;
......@@ -208,6 +210,25 @@ class UnitGraph : public BaseHeteroGraph {
static HeteroGraphPtr CopyTo(HeteroGraphPtr g, const DLContext &ctx,
const DGLStreamHandle &stream = nullptr);
/*!
* \brief Pin the in_csr_, out_scr_ and coo_ of the current graph.
* \note The graph will be pinned inplace. Behavior depends on the current context,
* kDLCPU: will be pinned;
* kDLCPUPinned: directly return;
* kDLGPU: invalid, will throw an error.
* The context check is deferred to pinning the NDArray.
*/
void PinMemory_();
/*!
* \brief Unpin the in_csr_, out_scr_ and coo_ of the current graph.
* \note The graph will be unpinned inplace. Behavior depends on the current context,
* kDLCPUPinned: will be unpinned;
* others: directly return.
* The context check is deferred to unpinning the NDArray.
*/
void UnpinMemory_();
/*!
* \brief Create in-edge CSR format of the unit graph.
* \param inplace if true and the in-edge CSR format does not exist, the created
......
......@@ -99,6 +99,10 @@ DeviceAPI* DeviceAPI::Get(DGLContext ctx, bool allow_missing) {
static_cast<int>(ctx.device_type), allow_missing);
}
DeviceAPI* DeviceAPI::Get(DLDeviceType dev_type, bool allow_missing) {
return DeviceAPIManager::Get(static_cast<int>(dev_type), allow_missing);
}
void* DeviceAPI::AllocWorkspace(DGLContext ctx,
size_t size,
DGLType type_hint) {
......@@ -124,11 +128,11 @@ void DeviceAPI::SyncStreamFromTo(DGLContext ctx,
LOG(FATAL) << "Device does not support stream api.";
}
void DeviceAPI::PinData(DGLContext ctx, void* ptr, size_t nbytes) {
void DeviceAPI::PinData(void* ptr, size_t nbytes) {
LOG(FATAL) << "Device does not support cudaHostRegister api.";
}
void DeviceAPI::UnpinData(DGLContext ctx, void* ptr) {
void DeviceAPI::UnpinData(void* ptr) {
LOG(FATAL) << "Device does not support cudaHostUnregister api.";
}
} // namespace runtime
......
......@@ -174,13 +174,16 @@ class CUDADeviceAPI final : public DeviceAPI {
return static_cast<DGLStreamHandle>(CUDAThreadEntry::ThreadLocal()->stream);
}
void PinData(DGLContext ctx, void* ptr, size_t nbytes) {
CUDA_CALL(cudaSetDevice(ctx.device_id));
/*! NOTE: cudaHostRegister can be called from an arbitrary GPU device,
* so we don't need to specify a ctx.
* The pinned memory can be seen by all CUDA contexts,
* not just the one that performed the allocation
*/
void PinData(void* ptr, size_t nbytes) {
CUDA_CALL(cudaHostRegister(ptr, nbytes, cudaHostRegisterDefault));
}
void UnpinData(DGLContext ctx, void* ptr) {
CUDA_CALL(cudaSetDevice(ctx.device_id));
void UnpinData(void* ptr) {
CUDA_CALL(cudaHostUnregister(ptr));
}
......
......@@ -63,6 +63,10 @@ struct NDArray::Internal {
} else if (ptr->mem) {
ptr->mem = nullptr;
} else if (ptr->dl_tensor.data != nullptr) {
// if the array is still pinned before freeing, unpin it.
if (ptr->dl_tensor.ctx.device_type == kDLCPUPinned) {
UnpinData(&(ptr->dl_tensor));
}
dgl::runtime::DeviceAPI::Get(ptr->dl_tensor.ctx)->FreeDataSpace(
ptr->dl_tensor.ctx, ptr->dl_tensor.data);
}
......@@ -202,6 +206,19 @@ NDArray NDArray::EmptyShared(const std::string &name,
return ret;
}
inline DLContext GetDevice(DLContext ctx) {
switch (ctx.device_type) {
case kDLCPU:
case kDLGPU:
return ctx;
break;
default:
// fallback to CPU
return DLContext{kDLCPU, 0};
break;
}
}
NDArray NDArray::Empty(std::vector<int64_t> shape,
DLDataType dtype,
DLContext ctx) {
......@@ -209,7 +226,7 @@ NDArray NDArray::Empty(std::vector<int64_t> shape,
if (td->IsAvailable())
return td->Empty(shape, dtype, ctx);
NDArray ret = Internal::Create(shape, dtype, ctx);
NDArray ret = Internal::Create(shape, dtype, GetDevice(ctx));
// setup memory content
size_t size = GetDataSize(ret.data_->dl_tensor);
size_t alignment = GetDataAlignment(ret.data_->dl_tensor);
......@@ -251,6 +268,22 @@ void NDArray::CopyFromTo(DLTensor* from,
from_size, from->ctx, to->ctx, from->dtype, stream);
}
void NDArray::PinData(DLTensor* tensor) {
// Only need to call PinData once, since the pinned memory can be seen
// by all CUDA contexts, not just the one that performed the allocation
if (tensor->ctx.device_type == kDLCPUPinned) return;
CHECK_EQ(tensor->ctx.device_type, kDLCPU)
<< "Only NDArray on CPU can be pinned";
DeviceAPI::Get(kDLGPU)->PinData(tensor->data, GetDataSize(*tensor));
tensor->ctx = DLContext{kDLCPUPinned, 0};
}
void NDArray::UnpinData(DLTensor* tensor) {
if (tensor->ctx.device_type != kDLCPUPinned) return;
DeviceAPI::Get(kDLGPU)->UnpinData(tensor->data);
tensor->ctx = DLContext{kDLCPU, 0};
}
template<typename T>
NDArray NDArray::FromVector(const std::vector<T>& vec, DLContext ctx) {
const DLDataType dtype = DLDataTypeTraits<T>::dtype;
......@@ -508,16 +541,13 @@ int DGLArrayCopyToBytes(DGLArrayHandle handle,
int DGLArrayPinData(DGLArrayHandle handle,
DLContext ctx) {
API_BEGIN();
CHECK_EQ(ctx.device_type, kDLGPU);
DeviceAPI::Get(ctx)->PinData(ctx, handle->data,
GetDataSize(*handle));
NDArray::PinData(handle);
API_END();
}
int DGLArrayUnpinData(DGLArrayHandle handle,
DLContext ctx) {
API_BEGIN();
CHECK_EQ(ctx.device_type, kDLGPU);
DeviceAPI::Get(ctx)->UnpinData(ctx, handle->data);
NDArray::UnpinData(handle);
API_END();
}
......@@ -952,6 +952,68 @@ def test_to_device2(g, idtype):
assert g1.etypes == g.etypes
assert g1.canonical_etypes == g.canonical_etypes
@unittest.skipIf(F._default_context_str == 'cpu', reason="Need gpu for this test")
@parametrize_dtype
def test_pin_memory_(idtype):
# TODO: rewrite this test case to accept different graphs so we
# can test reverse graph and batched graph
g = create_test_heterograph(idtype)
g.nodes['user'].data['h'] = F.ones((3, 5))
g.nodes['game'].data['i'] = F.ones((2, 5))
g.edges['plays'].data['e'] = F.ones((4, 4))
g = g.to(F.cpu())
assert not g.is_pinned()
if F.is_cuda_available():
# unpin an unpinned CPU graph, directly return
g.unpin_memory_()
assert not g.is_pinned()
assert g.device == F.cpu()
# pin a CPU graph
g.pin_memory_()
assert g.is_pinned()
assert g.device == F.cpu()
assert F.context(g.nodes['user'].data['h']) == F.cpu()
assert F.context(g.nodes['game'].data['i']) == F.cpu()
assert F.context(g.edges['plays'].data['e']) == F.cpu()
for ntype in g.ntypes:
assert F.context(g.batch_num_nodes(ntype)) == F.cpu()
for etype in g.canonical_etypes:
assert F.context(g.batch_num_edges(etype)) == F.cpu()
# not allowed to create new formats for the pinned graph
with pytest.raises(DGLError):
g.create_formats_()
# it's fine to clone with new formats, but new graphs are not pinned
# >>> g.formats()
# {'created': ['coo'], 'not created': ['csr', 'csc']}
assert not g.formats('csc').is_pinned()
assert not g.formats('csr').is_pinned()
# 'coo' formats is already created and thus not cloned
assert g.formats('coo').is_pinned()
# pin a pinned graph, direcly return
g.pin_memory_()
assert g.is_pinned()
assert g.device == F.cpu()
# unpin a pinned graph
g.unpin_memory_()
assert not g.is_pinned()
assert g.device == F.cpu()
g1 = g.to(F.cuda())
# unpin an unpinned GPU graph, directly return
g1.unpin_memory_()
assert not g1.is_pinned()
assert g1.device == F.cuda()
# error pinning a GPU graph
with pytest.raises(DGLError):
g1.pin_memory_()
@parametrize_dtype
def test_convert_bound(idtype):
def _test_bipartite_bound(data, card):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment