Unverified Commit cded5b80 authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[Feature] Bump DLPack to v0.7 and decouple DLPack from the core library (#4454)

* rename `DLContext` to `DGLContext`

* rename `kDLGPU` to `kDLCUDA`

* replace DLTensor with DGLArray

* fix linting

* Unify DGLType and DLDataType to DGLDataType

* Fix FFI

* rename DLDeviceType to DGLDeviceType

* decouple dlpack from the core library

* fix bug

* fix lint

* fix merge

* fix build

* address comments

* rename dl_converter to dlpack_convert

* remove redundant comments
parent f1689ad0
......@@ -240,7 +240,7 @@ __global__ void _CSRRowWiseSampleUniformReplaceKernel(
///////////////////////////// CSR sampling //////////////////////////
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat,
IdArray rows,
const int64_t num_picks,
......@@ -311,7 +311,7 @@ COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat,
device->CopyDataFromTo(out_ptr, num_rows * sizeof(new_len), &new_len, 0,
sizeof(new_len),
ctx,
DGLContext{kDLCPU, 0},
DGLContext{kDGLCPU, 0},
mat.indptr->dtype);
CUDA_CALL(cudaEventRecord(copyEvent, stream));
......@@ -369,9 +369,9 @@ COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat,
picked_col, picked_idx);
}
template COOMatrix CSRRowWiseSamplingUniform<kDLGPU, int32_t>(
template COOMatrix CSRRowWiseSamplingUniform<kDGLCUDA, int32_t>(
CSRMatrix, IdArray, int64_t, bool);
template COOMatrix CSRRowWiseSamplingUniform<kDLGPU, int64_t>(
template COOMatrix CSRRowWiseSamplingUniform<kDGLCUDA, int64_t>(
CSRMatrix, IdArray, int64_t, bool);
} // namespace impl
......
......@@ -416,7 +416,7 @@ __global__ void _CSRRowWiseSampleReplaceKernel(
* @param replace Is replacement sampling?
* @author pengqirong (OPPO), dlasalle and Xin from Nvidia.
*/
template <DLDeviceType XPU, typename IdType, typename FloatType>
template <DGLDeviceType XPU, typename IdType, typename FloatType>
COOMatrix CSRRowWiseSampling(CSRMatrix mat,
IdArray rows,
int64_t num_picks,
......@@ -492,7 +492,7 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat,
device->CopyDataFromTo(temp_ptr, num_rows * sizeof(temp_len), &temp_len, 0,
sizeof(temp_len),
ctx,
DGLContext{kDLCPU, 0},
DGLContext{kDGLCPU, 0},
mat.indptr->dtype);
device->StreamSync(ctx, stream);
......@@ -523,7 +523,7 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat,
device->CopyDataFromTo(out_ptr, num_rows * sizeof(new_len), &new_len, 0,
sizeof(new_len),
ctx,
DGLContext{kDLCPU, 0},
DGLContext{kDGLCPU, 0},
mat.indptr->dtype);
CUDA_CALL(cudaEventRecord(copyEvent, stream));
......@@ -651,13 +651,13 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat,
picked_col, picked_idx);
}
template COOMatrix CSRRowWiseSampling<kDLGPU, int32_t, float>(
template COOMatrix CSRRowWiseSampling<kDGLCUDA, int32_t, float>(
CSRMatrix, IdArray, int64_t, FloatArray, bool);
template COOMatrix CSRRowWiseSampling<kDLGPU, int64_t, float>(
template COOMatrix CSRRowWiseSampling<kDGLCUDA, int64_t, float>(
CSRMatrix, IdArray, int64_t, FloatArray, bool);
template COOMatrix CSRRowWiseSampling<kDLGPU, int32_t, double>(
template COOMatrix CSRRowWiseSampling<kDGLCUDA, int32_t, double>(
CSRMatrix, IdArray, int64_t, FloatArray, bool);
template COOMatrix CSRRowWiseSampling<kDLGPU, int64_t, double>(
template COOMatrix CSRRowWiseSampling<kDGLCUDA, int64_t, double>(
CSRMatrix, IdArray, int64_t, FloatArray, bool);
} // namespace impl
......
......@@ -54,52 +54,52 @@ void SDDMMCoo(const std::string& op,
}
template void SDDMMCsr<kDLGPU, int32_t, 16>(
template void SDDMMCsr<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int64_t, 16>(
template void SDDMMCsr<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int32_t, 32>(
template void SDDMMCsr<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int64_t, 32>(
template void SDDMMCsr<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int32_t, 64>(
template void SDDMMCsr<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int64_t, 64>(
template void SDDMMCsr<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int32_t, 16>(
template void SDDMMCoo<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int64_t, 16>(
template void SDDMMCoo<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int32_t, 32>(
template void SDDMMCoo<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int64_t, 32>(
template void SDDMMCoo<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int32_t, 64>(
template void SDDMMCoo<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int64_t, 64>(
template void SDDMMCoo<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target);
......
......@@ -42,42 +42,42 @@ void SDDMMCooHetero(const std::string& op,
}
template void SDDMMCooHetero<kDLGPU, int32_t, 16>(
template void SDDMMCooHetero<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int64_t, 16>(
template void SDDMMCooHetero<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int32_t, 32>(
template void SDDMMCooHetero<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int64_t, 32>(
template void SDDMMCooHetero<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int32_t, 64>(
template void SDDMMCooHetero<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int64_t, 64>(
template void SDDMMCooHetero<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
......
......@@ -41,42 +41,42 @@ void SDDMMCsrHetero(const std::string& op,
});
}
template void SDDMMCsrHetero<kDLGPU, int32_t, 16>(
template void SDDMMCsrHetero<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int64_t, 16>(
template void SDDMMCsrHetero<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int32_t, 32>(
template void SDDMMCsrHetero<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int64_t, 32>(
template void SDDMMCsrHetero<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int32_t, 64>(
template void SDDMMCsrHetero<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int64_t, 64>(
template void SDDMMCsrHetero<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
......
......@@ -73,113 +73,113 @@ void BackwardSegmentCmp(NDArray feat,
}
template void SegmentReduce<kDLGPU, int32_t, 16>(
template void SegmentReduce<kDGLCUDA, int32_t, 16>(
const std::string& op,
NDArray feat,
NDArray offsets,
NDArray out,
NDArray arg);
template void SegmentReduce<kDLGPU, int64_t, 16>(
template void SegmentReduce<kDGLCUDA, int64_t, 16>(
const std::string &op,
NDArray feat,
NDArray offsets,
NDArray out,
NDArray arg);
template void SegmentReduce<kDLGPU, int32_t, 32>(
template void SegmentReduce<kDGLCUDA, int32_t, 32>(
const std::string& op,
NDArray feat,
NDArray offsets,
NDArray out,
NDArray arg);
template void SegmentReduce<kDLGPU, int64_t, 32>(
template void SegmentReduce<kDGLCUDA, int64_t, 32>(
const std::string &op,
NDArray feat,
NDArray offsets,
NDArray out,
NDArray arg);
template void SegmentReduce<kDLGPU, int32_t, 64>(
template void SegmentReduce<kDGLCUDA, int32_t, 64>(
const std::string &op,
NDArray feat,
NDArray offsets,
NDArray out,
NDArray arg);
template void SegmentReduce<kDLGPU, int64_t, 64>(
template void SegmentReduce<kDGLCUDA, int64_t, 64>(
const std::string &op,
NDArray feat,
NDArray offsets,
NDArray out,
NDArray arg);
template void ScatterAdd<kDLGPU, int32_t, 16>(
template void ScatterAdd<kDGLCUDA, int32_t, 16>(
NDArray feat,
NDArray idx,
NDArray out);
template void ScatterAdd<kDLGPU, int64_t, 16>(
template void ScatterAdd<kDGLCUDA, int64_t, 16>(
NDArray feat,
NDArray idx,
NDArray out);
template void ScatterAdd<kDLGPU, int32_t, 32>(
template void ScatterAdd<kDGLCUDA, int32_t, 32>(
NDArray feat,
NDArray idx,
NDArray out);
template void ScatterAdd<kDLGPU, int64_t, 32>(
template void ScatterAdd<kDGLCUDA, int64_t, 32>(
NDArray feat,
NDArray idx,
NDArray out);
template void ScatterAdd<kDLGPU, int32_t, 64>(
template void ScatterAdd<kDGLCUDA, int32_t, 64>(
NDArray feat,
NDArray idx,
NDArray out);
template void ScatterAdd<kDLGPU, int64_t, 64>(
template void ScatterAdd<kDGLCUDA, int64_t, 64>(
NDArray feat,
NDArray idx,
NDArray out);
template void UpdateGradMinMax_hetero<kDLGPU, int32_t, 16>(
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, 16>(
const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int64_t, 16>(
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, 16>(
const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int32_t, 32>(
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, 32>(
const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int64_t, 32>(
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, 32>(
const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int32_t, 64>(
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, 64>(
const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int64_t, 64>(
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, 64>(
const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void BackwardSegmentCmp<kDLGPU, int32_t, 16>(
template void BackwardSegmentCmp<kDGLCUDA, int32_t, 16>(
NDArray feat,
NDArray arg,
NDArray out);
template void BackwardSegmentCmp<kDLGPU, int64_t, 16>(
template void BackwardSegmentCmp<kDGLCUDA, int64_t, 16>(
NDArray feat,
NDArray arg,
NDArray out);
template void BackwardSegmentCmp<kDLGPU, int32_t, 32>(
template void BackwardSegmentCmp<kDGLCUDA, int32_t, 32>(
NDArray feat,
NDArray arg,
NDArray out);
template void BackwardSegmentCmp<kDLGPU, int64_t, 32>(
template void BackwardSegmentCmp<kDGLCUDA, int64_t, 32>(
NDArray feat,
NDArray arg,
NDArray out);
template void BackwardSegmentCmp<kDLGPU, int32_t, 64>(
template void BackwardSegmentCmp<kDGLCUDA, int32_t, 64>(
NDArray feat,
NDArray arg,
NDArray out);
template void BackwardSegmentCmp<kDLGPU, int64_t, 64>(
template void BackwardSegmentCmp<kDGLCUDA, int64_t, 64>(
NDArray feat,
NDArray arg,
NDArray out);
......
......@@ -71,7 +71,7 @@ __global__ void _COOGetRowNNZKernel(
}
}
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = coo.row->ctx;
......@@ -84,12 +84,12 @@ int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
nb, nt, 0, stream,
coo.row.Ptr<IdType>(), rst.Ptr<IdType>(),
row, nnz);
rst = rst.CopyTo(DLContext{kDLCPU, 0});
rst = rst.CopyTo(DGLContext{kDGLCPU, 0});
return *rst.Ptr<IdType>();
}
template int64_t COOGetRowNNZ<kDLGPU, int32_t>(COOMatrix, int64_t);
template int64_t COOGetRowNNZ<kDLGPU, int64_t>(COOMatrix, int64_t);
template int64_t COOGetRowNNZ<kDGLCUDA, int32_t>(COOMatrix, int64_t);
template int64_t COOGetRowNNZ<kDGLCUDA, int64_t>(COOMatrix, int64_t);
template <typename IdType>
__global__ void _COOGetAllRowNNZKernel(
......@@ -104,7 +104,7 @@ __global__ void _COOGetAllRowNNZKernel(
}
}
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = coo.row->ctx;
......@@ -112,7 +112,7 @@ NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
IdType num_rows = coo.num_rows;
IdType num_queries = rows->shape[0];
if (num_queries == 1) {
auto rows_cpu = rows.CopyTo(DLContext{kDLCPU, 0});
auto rows_cpu = rows.CopyTo(DGLContext{kDGLCPU, 0});
int64_t row = *rows_cpu.Ptr<IdType>();
IdType nt = 1024;
IdType nb = dgl::cuda::FindNumBlocks<'x'>((nnz + nt - 1) / nt);
......@@ -136,8 +136,8 @@ NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
}
}
template NDArray COOGetRowNNZ<kDLGPU, int32_t>(COOMatrix, NDArray);
template NDArray COOGetRowNNZ<kDLGPU, int64_t>(COOMatrix, NDArray);
template NDArray COOGetRowNNZ<kDGLCUDA, int32_t>(COOMatrix, NDArray);
template NDArray COOGetRowNNZ<kDGLCUDA, int64_t>(COOMatrix, NDArray);
} // namespace impl
} // namespace aten
......
......@@ -21,7 +21,7 @@ namespace impl {
///////////////////////////// CSRIsNonZero /////////////////////////////
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = csr.indptr->ctx;
......@@ -38,14 +38,14 @@ bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) {
rows.Ptr<IdType>(), cols.Ptr<IdType>(),
1, 1, 1,
static_cast<IdType*>(nullptr), static_cast<IdType>(-1), out.Ptr<IdType>());
out = out.CopyTo(DLContext{kDLCPU, 0});
out = out.CopyTo(DGLContext{kDGLCPU, 0});
return *out.Ptr<IdType>() != -1;
}
template bool CSRIsNonZero<kDLGPU, int32_t>(CSRMatrix, int64_t, int64_t);
template bool CSRIsNonZero<kDLGPU, int64_t>(CSRMatrix, int64_t, int64_t);
template bool CSRIsNonZero<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t);
template bool CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t);
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) {
const auto rowlen = row->shape[0];
const auto collen = col->shape[0];
......@@ -69,8 +69,8 @@ NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) {
return rst != -1;
}
template NDArray CSRIsNonZero<kDLGPU, int32_t>(CSRMatrix, NDArray, NDArray);
template NDArray CSRIsNonZero<kDLGPU, int64_t>(CSRMatrix, NDArray, NDArray);
template NDArray CSRIsNonZero<kDGLCUDA, int32_t>(CSRMatrix, NDArray, NDArray);
template NDArray CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, NDArray, NDArray);
///////////////////////////// CSRHasDuplicate /////////////////////////////
......@@ -95,7 +95,7 @@ __global__ void _SegmentHasNoDuplicate(
}
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
bool CSRHasDuplicate(CSRMatrix csr) {
if (!csr.sorted)
csr = CSRSort(csr);
......@@ -116,20 +116,20 @@ bool CSRHasDuplicate(CSRMatrix csr) {
return !ret;
}
template bool CSRHasDuplicate<kDLGPU, int32_t>(CSRMatrix csr);
template bool CSRHasDuplicate<kDLGPU, int64_t>(CSRMatrix csr);
template bool CSRHasDuplicate<kDGLCUDA, int32_t>(CSRMatrix csr);
template bool CSRHasDuplicate<kDGLCUDA, int64_t>(CSRMatrix csr);
///////////////////////////// CSRGetRowNNZ /////////////////////////////
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
int64_t CSRGetRowNNZ(CSRMatrix csr, int64_t row) {
const IdType cur = aten::IndexSelect<IdType>(csr.indptr, row);
const IdType next = aten::IndexSelect<IdType>(csr.indptr, row + 1);
return next - cur;
}
template int64_t CSRGetRowNNZ<kDLGPU, int32_t>(CSRMatrix, int64_t);
template int64_t CSRGetRowNNZ<kDLGPU, int64_t>(CSRMatrix, int64_t);
template int64_t CSRGetRowNNZ<kDGLCUDA, int32_t>(CSRMatrix, int64_t);
template int64_t CSRGetRowNNZ<kDGLCUDA, int64_t>(CSRMatrix, int64_t);
template <typename IdType>
__global__ void _CSRGetRowNNZKernel(
......@@ -146,7 +146,7 @@ __global__ void _CSRGetRowNNZKernel(
}
}
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto len = rows->shape[0];
......@@ -162,24 +162,24 @@ NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) {
return rst;
}
template NDArray CSRGetRowNNZ<kDLGPU, int32_t>(CSRMatrix, NDArray);
template NDArray CSRGetRowNNZ<kDLGPU, int64_t>(CSRMatrix, NDArray);
template NDArray CSRGetRowNNZ<kDGLCUDA, int32_t>(CSRMatrix, NDArray);
template NDArray CSRGetRowNNZ<kDGLCUDA, int64_t>(CSRMatrix, NDArray);
///////////////////////////// CSRGetRowColumnIndices /////////////////////////////
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
NDArray CSRGetRowColumnIndices(CSRMatrix csr, int64_t row) {
const int64_t len = impl::CSRGetRowNNZ<XPU, IdType>(csr, row);
const int64_t offset = aten::IndexSelect<IdType>(csr.indptr, row) * sizeof(IdType);
return csr.indices.CreateView({len}, csr.indices->dtype, offset);
}
template NDArray CSRGetRowColumnIndices<kDLGPU, int32_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowColumnIndices<kDLGPU, int64_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowColumnIndices<kDGLCUDA, int32_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowColumnIndices<kDGLCUDA, int64_t>(CSRMatrix, int64_t);
///////////////////////////// CSRGetRowData /////////////////////////////
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
NDArray CSRGetRowData(CSRMatrix csr, int64_t row) {
const int64_t len = impl::CSRGetRowNNZ<XPU, IdType>(csr, row);
const int64_t offset = aten::IndexSelect<IdType>(csr.indptr, row) * sizeof(IdType);
......@@ -189,12 +189,12 @@ NDArray CSRGetRowData(CSRMatrix csr, int64_t row) {
return aten::Range(offset, offset + len, csr.indptr->dtype.bits, csr.indptr->ctx);
}
template NDArray CSRGetRowData<kDLGPU, int32_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowData<kDLGPU, int64_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowData<kDGLCUDA, int32_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowData<kDGLCUDA, int64_t>(CSRMatrix, int64_t);
///////////////////////////// CSRSliceRows /////////////////////////////
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) {
const int64_t num_rows = end - start;
const IdType st_pos = aten::IndexSelect<IdType>(csr.indptr, start);
......@@ -215,8 +215,8 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) {
csr.sorted);
}
template CSRMatrix CSRSliceRows<kDLGPU, int32_t>(CSRMatrix, int64_t, int64_t);
template CSRMatrix CSRSliceRows<kDLGPU, int64_t>(CSRMatrix, int64_t, int64_t);
template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t);
template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t);
/*!
* \brief Copy data segment to output buffers
......@@ -243,7 +243,7 @@ __global__ void _SegmentCopyKernel(
}
}
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
const int64_t len = rows->shape[0];
......@@ -272,8 +272,8 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) {
csr.sorted);
}
template CSRMatrix CSRSliceRows<kDLGPU, int32_t>(CSRMatrix , NDArray);
template CSRMatrix CSRSliceRows<kDLGPU, int64_t>(CSRMatrix , NDArray);
template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix , NDArray);
template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix , NDArray);
///////////////////////////// CSRGetDataAndIndices /////////////////////////////
......@@ -345,7 +345,7 @@ __global__ void _SortedSearchKernel(
}
}
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
std::vector<NDArray> CSRGetDataAndIndices(CSRMatrix csr, NDArray row, NDArray col) {
const auto rowlen = row->shape[0];
const auto collen = col->shape[0];
......@@ -392,9 +392,9 @@ std::vector<NDArray> CSRGetDataAndIndices(CSRMatrix csr, NDArray row, NDArray co
return {ret_row, ret_col, ret_data};
}
template std::vector<NDArray> CSRGetDataAndIndices<kDLGPU, int32_t>(
template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int32_t>(
CSRMatrix csr, NDArray rows, NDArray cols);
template std::vector<NDArray> CSRGetDataAndIndices<kDLGPU, int64_t>(
template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int64_t>(
CSRMatrix csr, NDArray rows, NDArray cols);
///////////////////////////// CSRSliceMatrix /////////////////////////////
......@@ -422,7 +422,7 @@ __global__ void _SegmentMaskColKernel(
}
}
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = rows->ctx;
......@@ -501,9 +501,9 @@ CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray
ret_col, ret_data);
}
template CSRMatrix CSRSliceMatrix<kDLGPU, int32_t>(
template CSRMatrix CSRSliceMatrix<kDGLCUDA, int32_t>(
CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols);
template CSRMatrix CSRSliceMatrix<kDLGPU, int64_t>(
template CSRMatrix CSRSliceMatrix<kDGLCUDA, int64_t>(
CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols);
} // namespace impl
......
......@@ -147,53 +147,53 @@ void SpMMCoo(const std::string& op, const std::string& reduce,
}
}
template void SpMMCsr<kDLGPU, int32_t, 16>(
template void SpMMCsr<kDGLCUDA, int32_t, 16>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int64_t, 16>(
template void SpMMCsr<kDGLCUDA, int64_t, 16>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int32_t, 32>(
template void SpMMCsr<kDGLCUDA, int32_t, 32>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int64_t, 32>(
template void SpMMCsr<kDGLCUDA, int64_t, 32>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int32_t, 64>(
template void SpMMCsr<kDGLCUDA, int32_t, 64>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int64_t, 64>(
template void SpMMCsr<kDGLCUDA, int64_t, 64>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int32_t, 16>(
template void SpMMCoo<kDGLCUDA, int32_t, 16>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int64_t, 16>(
template void SpMMCoo<kDGLCUDA, int64_t, 16>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int32_t, 32>(
template void SpMMCoo<kDGLCUDA, int32_t, 32>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int64_t, 32>(
template void SpMMCoo<kDGLCUDA, int64_t, 32>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int32_t, 64>(
template void SpMMCoo<kDGLCUDA, int32_t, 64>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int64_t, 64>(
template void SpMMCoo<kDGLCUDA, int64_t, 64>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
......
......@@ -203,7 +203,7 @@ cusparseStatus_t Xcsrmm2<double>(cusparseHandle_t handle, cusparseOperation_t tr
/*! Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType>
void CusparseCsrmm2(
const DLContext& ctx,
const DGLContext& ctx,
const CSRMatrix& csr,
const DType* B_data, const DType* A_data,
DType* C_data,
......@@ -303,7 +303,7 @@ void CusparseCsrmm2(
/*! Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType>
void CusparseCsrmm2Hetero(
const DLContext& ctx,
const DGLContext& ctx,
const CSRMatrix& csr,
const DType* B_data, const DType* A_data,
DType* C_data,
......
......@@ -199,37 +199,37 @@ void SpMMCsrHetero(const std::string& op, const std::string& reduce,
});
}
template void SpMMCsrHetero<kDLGPU, int32_t, 16>(
template void SpMMCsrHetero<kDGLCUDA, int32_t, 16>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int64_t, 16>(
template void SpMMCsrHetero<kDGLCUDA, int64_t, 16>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int32_t, 32>(
template void SpMMCsrHetero<kDGLCUDA, int32_t, 32>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int64_t, 32>(
template void SpMMCsrHetero<kDGLCUDA, int64_t, 32>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int32_t, 64>(
template void SpMMCsrHetero<kDGLCUDA, int32_t, 64>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int64_t, 64>(
template void SpMMCsrHetero<kDGLCUDA, int64_t, 64>(
const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
......
......@@ -11,7 +11,7 @@
namespace dgl {
namespace cuda {
bool AllTrue(int8_t* flags, int64_t length, const DLContext& ctx) {
bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx) {
auto device = runtime::DeviceAPI::Get(ctx);
int8_t* rst = static_cast<int8_t*>(device->AllocWorkspace(ctx, 1));
// Call CUB's reduction
......
......@@ -7,9 +7,9 @@
#define DGL_ARRAY_CUDA_UTILS_H_
#include <dmlc/logging.h>
#include <dgl/runtime/c_runtime_api.h>
#include <dgl/runtime/device_api.h>
#include <dgl/runtime/ndarray.h>
#include <dlpack/dlpack.h>
#include "../../runtime/cuda/cuda_common.h"
namespace dgl {
......@@ -115,7 +115,7 @@ __device__ __forceinline__ T _ldg(T* addr) {
* \param ctx Device context.
* \return True if all the flags are true.
*/
bool AllTrue(int8_t* flags, int64_t length, const DLContext& ctx);
bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx);
/*!
* \brief CUDA Kernel of filling the vector started from ptr of size length
......@@ -187,7 +187,7 @@ __global__ void _LinearSearchKernel(
template <typename DType>
inline DType GetCUDAScalar(
runtime::DeviceAPI* device_api,
DLContext ctx,
DGLContext ctx,
const DType* cuda_ptr) {
DType result;
device_api->CopyDataFromTo(
......@@ -195,8 +195,8 @@ inline DType GetCUDAScalar(
&result, 0,
sizeof(result),
ctx,
DLContext{kDLCPU, 0},
DLDataTypeTraits<DType>::dtype);
DGLContext{kDGLCPU, 0},
DGLDataTypeTraits<DType>::dtype);
return result;
}
......
......@@ -25,7 +25,7 @@ NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
std::vector<int64_t> shape{len};
CHECK(array.IsPinned());
CHECK_EQ(index->ctx.device_type, kDLGPU);
CHECK_EQ(index->ctx.device_type, kDGLCUDA);
for (int d = 1; d < array->ndim; ++d) {
num_feat *= array->shape[d];
......@@ -85,8 +85,8 @@ void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) {
std::vector<int64_t> shape{len};
CHECK(dest.IsPinned());
CHECK_EQ(index->ctx.device_type, kDLGPU);
CHECK_EQ(source->ctx.device_type, kDLGPU);
CHECK_EQ(index->ctx.device_type, kDGLCUDA);
CHECK_EQ(source->ctx.device_type, kDGLCUDA);
for (int d = 1; d < source->ndim; ++d) {
num_feat *= source->shape[d];
......
......@@ -15,7 +15,7 @@ namespace array {
using namespace dgl::runtime;
template<DLDeviceType XPU, typename IdType>
template<DGLDeviceType XPU, typename IdType>
FilterRef CreateSetFilter(IdArray set);
DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterCreateFromSet")
......@@ -23,10 +23,10 @@ DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterCreateFromSet")
IdArray array = args[0];
auto ctx = array->ctx;
// TODO(nv-dlasalle): Implement CPU version.
if (ctx.device_type == kDLGPU) {
if (ctx.device_type == kDGLCUDA) {
#ifdef DGL_USE_CUDA
ATEN_ID_TYPE_SWITCH(array->dtype, IdType, {
*rv = CreateSetFilter<kDLGPU, IdType>(array);
*rv = CreateSetFilter<kDGLCUDA, IdType>(array);
});
#else
LOG(FATAL) << "GPU support not compiled.";
......
......@@ -8,6 +8,7 @@
#ifdef USE_TVM
#include <featgraph.h>
#include <dgl/runtime/dlpack_convert.h>
#endif // USE_TVM
#include "kernel_decl.h"
......@@ -70,7 +71,7 @@ void SegmentMM(const NDArray A,
}
CHECK_EQ(B->shape[0], seglen_A.NumElements())
<< "segment_mm expects len(seglen_A) == B.shape[0]";
CHECK_EQ(seglen_A->ctx.device_type, kDLCPU)
CHECK_EQ(seglen_A->ctx.device_type, kDGLCPU)
<< "segment_mm expects seglen_A to be on CPU.";
CHECK(A->ctx == B->ctx) << "segment_mm expects A and B to be of the same device";
ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMM", {
......@@ -89,7 +90,7 @@ void SegmentMMBackwardB(const NDArray A,
CHECK_EQ(A->ndim, 2) << "segment_mm_backward operator expects a 2D tensor for the first input.";
CHECK_EQ(dC->ndim, 2)
<< "segment_mm_backward operator expects a 2D tensor for the second input.";
CHECK_EQ(seglen->ctx.device_type, kDLCPU)
CHECK_EQ(seglen->ctx.device_type, kDGLCPU)
<< "segment_mm expects seglen to be on CPU.";
ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMMBackwardB", {
ATEN_ID_TYPE_SWITCH(seglen->dtype, IdType, {
......@@ -829,8 +830,12 @@ DGL_REGISTER_GLOBAL("sparse._CAPI_FG_SDDMMTreeReduction")
// {lhs, rhs, out},
// {"U_data", "E_data", "V_data"});
COOMatrix coo = graph.sptr()->GetCOOMatrix(0);
dgl::featgraph::SDDMMTreeReduction(coo.row.ToDLPack(), coo.col.ToDLPack(),
lhs.ToDLPack(), rhs.ToDLPack(), out.ToDLPack());
dgl::featgraph::SDDMMTreeReduction(
DLPackConvert::ToDLPack(coo.row),
DLPackConvert::ToDLPack(coo.col),
DLPackConvert::ToDLPack(lhs),
DLPackConvert::ToDLPack(rhs),
DLPackConvert::ToDLPack(out));
});
#endif // USE_TVM
......
......@@ -16,7 +16,7 @@ namespace aten {
NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
#ifdef DGL_USE_CUDA
CHECK(array.IsPinned()) << "Input array must be in pinned memory.";
CHECK_EQ(index->ctx.device_type, kDLGPU) << "Index must be on the GPU.";
CHECK_EQ(index->ctx.device_type, kDGLCUDA) << "Index must be on the GPU.";
CHECK_GE(array->ndim, 1) << "Input array must have at least 1 dimension.";
CHECK_EQ(index->ndim, 1) << "Index must be a 1D array.";
......@@ -34,8 +34,8 @@ NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) {
#ifdef DGL_USE_CUDA
CHECK(dest.IsPinned()) << "Destination array must be in pinned memory.";
CHECK_EQ(index->ctx.device_type, kDLGPU) << "Index must be on the GPU.";
CHECK_EQ(source->ctx.device_type, kDLGPU) << "Source array must be on the GPU.";
CHECK_EQ(index->ctx.device_type, kDGLCUDA) << "Index must be on the GPU.";
CHECK_EQ(source->ctx.device_type, kDGLCUDA) << "Source array must be on the GPU.";
CHECK_EQ(dest->dtype, source->dtype) << "Destination array and source "
"array must have the same dtype.";
CHECK_GE(dest->ndim, 1) << "Destination array must have at least 1 dimension.";
......
......@@ -41,8 +41,8 @@ dgl::runtime::NDArray CopyVectorToNDArray(
const std::vector<DType>& vec) {
using dgl::runtime::NDArray;
const int64_t len = vec.size();
NDArray a = NDArray::Empty({len}, DLDataType{kDLInt, sizeof(IdType) * 8, 1},
DLContext{kDLCPU, 0});
NDArray a = NDArray::Empty({len}, DGLDataType{kDGLInt, sizeof(IdType) * 8, 1},
DGLContext{kDGLCPU, 0});
std::copy(vec.begin(), vec.end(), static_cast<IdType*>(a->data));
return a;
}
......
......@@ -50,7 +50,7 @@ template void GroupIndexShuffle<int64_t>(
template <typename IdType>
IdArray RandomPerm(int64_t num_nodes) {
IdArray perm = aten::NewIdArray(num_nodes, DLContext{kDLCPU, 0}, sizeof(IdType) * 8);
IdArray perm = aten::NewIdArray(num_nodes, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8);
IdType* perm_data = static_cast<IdType*>(perm->data);
std::iota(perm_data, perm_data + num_nodes, 0);
IndexShuffle(perm_data, num_nodes);
......@@ -59,7 +59,7 @@ IdArray RandomPerm(int64_t num_nodes) {
template <typename IdType>
IdArray GroupRandomPerm(const IdType *group_idxs, int64_t num_group_idxs, int64_t num_nodes) {
IdArray perm = aten::NewIdArray(num_nodes, DLContext{kDLCPU, 0}, sizeof(IdType) * 8);
IdArray perm = aten::NewIdArray(num_nodes, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8);
IdType* perm_data = static_cast<IdType*>(perm->data);
std::iota(perm_data, perm_data + num_nodes, 0);
GroupIndexShuffle(group_idxs, perm_data, num_group_idxs, num_nodes);
......@@ -77,7 +77,7 @@ IdArray GroupRandomPerm(const IdType *group_idxs, int64_t num_group_idxs, int64_
* Finally, we pick the point with the maximum such distance.
* This process will be repeated for ``sample_points`` - 1 times.
*/
template <DLDeviceType XPU, typename FloatType, typename IdType>
template <DGLDeviceType XPU, typename FloatType, typename IdType>
void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result) {
const FloatType* array_data = static_cast<FloatType*>(array->data);
......@@ -135,20 +135,20 @@ void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_poin
ret_start += sample_points;
}
}
template void FarthestPointSampler<kDLCPU, float, int32_t>(
template void FarthestPointSampler<kDGLCPU, float, int32_t>(
NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result);
template void FarthestPointSampler<kDLCPU, float, int64_t>(
template void FarthestPointSampler<kDGLCPU, float, int64_t>(
NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result);
template void FarthestPointSampler<kDLCPU, double, int32_t>(
template void FarthestPointSampler<kDGLCPU, double, int32_t>(
NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result);
template void FarthestPointSampler<kDLCPU, double, int64_t>(
template void FarthestPointSampler<kDGLCPU, double, int64_t>(
NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result);
template <DLDeviceType XPU, typename FloatType, typename IdType>
template <DGLDeviceType XPU, typename FloatType, typename IdType>
void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) {
const int64_t num_nodes = result->shape[0];
const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data);
......@@ -181,16 +181,16 @@ void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight,
result_data[v_max] = result_data[u];
}
}
template void WeightedNeighborMatching<kDLCPU, float, int32_t>(
template void WeightedNeighborMatching<kDGLCPU, float, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, float, int64_t>(
template void WeightedNeighborMatching<kDGLCPU, float, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, double, int32_t>(
template void WeightedNeighborMatching<kDGLCPU, double, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, double, int64_t>(
template void WeightedNeighborMatching<kDGLCPU, double, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
const int64_t num_nodes = result->shape[0];
const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data);
......@@ -221,8 +221,8 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
}
}
}
template void NeighborMatching<kDLCPU, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDLCPU, int64_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDGLCPU, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDGLCPU, int64_t>(const aten::CSRMatrix &csr, IdArray result);
} // namespace impl
} // namespace geometry
......
......@@ -150,7 +150,7 @@ bool Colorize(IdType * result_data, int64_t num_nodes, float * const prop) {
* are marked, mark this node with its id. Else match this (BLUE, RED) node
* pair and mark them with the smaller id between them.
*/
template <DLDeviceType XPU, typename FloatType, typename IdType>
template <DGLDeviceType XPU, typename FloatType, typename IdType>
void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = result->ctx;
......@@ -182,13 +182,13 @@ void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight,
}
device->FreeWorkspace(ctx, prop);
}
template void WeightedNeighborMatching<kDLGPU, float, int32_t>(
template void WeightedNeighborMatching<kDGLCUDA, float, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, float, int64_t>(
template void WeightedNeighborMatching<kDGLCUDA, float, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, double, int32_t>(
template void WeightedNeighborMatching<kDGLCUDA, double, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, double, int64_t>(
template void WeightedNeighborMatching<kDGLCUDA, double, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
/*! \brief Unweighted neighbor matching procedure (GPU version).
......@@ -201,7 +201,7 @@ template void WeightedNeighborMatching<kDLGPU, double, int64_t>(
* 2. Graph is sparse, thus neighborhood of each node is small,
* which is suitable for GPU implementation.
*/
template <DLDeviceType XPU, typename IdType>
template <DGLDeviceType XPU, typename IdType>
void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
const int64_t num_edges = csr.indices->shape[0];
const auto& ctx = result->ctx;
......@@ -211,7 +211,7 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
// generate random weights
cudaStream_t stream = runtime::getCurrentCUDAStream();
NDArray weight = NDArray::Empty(
{num_edges}, DLDataType{kDLFloat, sizeof(float) * 8, 1}, ctx);
{num_edges}, DGLDataType{kDGLFloat, sizeof(float) * 8, 1}, ctx);
float *weight_data = static_cast<float*>(weight->data);
uint64_t seed = dgl::RandomEngine::ThreadLocal()->RandInt(UINT64_MAX);
auto num_threads = cuda::FindNumThreads(num_edges);
......@@ -221,8 +221,8 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
WeightedNeighborMatching<XPU, float, IdType>(csr, weight, result);
}
template void NeighborMatching<kDLGPU, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDLGPU, int64_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDGLCUDA, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDGLCUDA, int64_t>(const aten::CSRMatrix &csr, IdArray result);
} // namespace impl
} // namespace geometry
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment