Unverified Commit cded5b80 authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[Feature] Bump DLPack to v0.7 and decouple DLPack from the core library (#4454)

* rename `DLContext` to `DGLContext`

* rename `kDLGPU` to `kDLCUDA`

* replace DLTensor with DGLArray

* fix linting

* Unify DGLType and DLDataType to DGLDataType

* Fix FFI

* rename DLDeviceType to DGLDeviceType

* decouple dlpack from the core library

* fix bug

* fix lint

* fix merge

* fix build

* address comments

* rename dl_converter to dlpack_convert

* remove redundant comments
parent f1689ad0
...@@ -240,7 +240,7 @@ __global__ void _CSRRowWiseSampleUniformReplaceKernel( ...@@ -240,7 +240,7 @@ __global__ void _CSRRowWiseSampleUniformReplaceKernel(
///////////////////////////// CSR sampling ////////////////////////// ///////////////////////////// CSR sampling //////////////////////////
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat, COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat,
IdArray rows, IdArray rows,
const int64_t num_picks, const int64_t num_picks,
...@@ -311,7 +311,7 @@ COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat, ...@@ -311,7 +311,7 @@ COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat,
device->CopyDataFromTo(out_ptr, num_rows * sizeof(new_len), &new_len, 0, device->CopyDataFromTo(out_ptr, num_rows * sizeof(new_len), &new_len, 0,
sizeof(new_len), sizeof(new_len),
ctx, ctx,
DGLContext{kDLCPU, 0}, DGLContext{kDGLCPU, 0},
mat.indptr->dtype); mat.indptr->dtype);
CUDA_CALL(cudaEventRecord(copyEvent, stream)); CUDA_CALL(cudaEventRecord(copyEvent, stream));
...@@ -369,9 +369,9 @@ COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat, ...@@ -369,9 +369,9 @@ COOMatrix CSRRowWiseSamplingUniform(CSRMatrix mat,
picked_col, picked_idx); picked_col, picked_idx);
} }
template COOMatrix CSRRowWiseSamplingUniform<kDLGPU, int32_t>( template COOMatrix CSRRowWiseSamplingUniform<kDGLCUDA, int32_t>(
CSRMatrix, IdArray, int64_t, bool); CSRMatrix, IdArray, int64_t, bool);
template COOMatrix CSRRowWiseSamplingUniform<kDLGPU, int64_t>( template COOMatrix CSRRowWiseSamplingUniform<kDGLCUDA, int64_t>(
CSRMatrix, IdArray, int64_t, bool); CSRMatrix, IdArray, int64_t, bool);
} // namespace impl } // namespace impl
......
...@@ -416,7 +416,7 @@ __global__ void _CSRRowWiseSampleReplaceKernel( ...@@ -416,7 +416,7 @@ __global__ void _CSRRowWiseSampleReplaceKernel(
* @param replace Is replacement sampling? * @param replace Is replacement sampling?
* @author pengqirong (OPPO), dlasalle and Xin from Nvidia. * @author pengqirong (OPPO), dlasalle and Xin from Nvidia.
*/ */
template <DLDeviceType XPU, typename IdType, typename FloatType> template <DGLDeviceType XPU, typename IdType, typename FloatType>
COOMatrix CSRRowWiseSampling(CSRMatrix mat, COOMatrix CSRRowWiseSampling(CSRMatrix mat,
IdArray rows, IdArray rows,
int64_t num_picks, int64_t num_picks,
...@@ -492,7 +492,7 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat, ...@@ -492,7 +492,7 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat,
device->CopyDataFromTo(temp_ptr, num_rows * sizeof(temp_len), &temp_len, 0, device->CopyDataFromTo(temp_ptr, num_rows * sizeof(temp_len), &temp_len, 0,
sizeof(temp_len), sizeof(temp_len),
ctx, ctx,
DGLContext{kDLCPU, 0}, DGLContext{kDGLCPU, 0},
mat.indptr->dtype); mat.indptr->dtype);
device->StreamSync(ctx, stream); device->StreamSync(ctx, stream);
...@@ -523,7 +523,7 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat, ...@@ -523,7 +523,7 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat,
device->CopyDataFromTo(out_ptr, num_rows * sizeof(new_len), &new_len, 0, device->CopyDataFromTo(out_ptr, num_rows * sizeof(new_len), &new_len, 0,
sizeof(new_len), sizeof(new_len),
ctx, ctx,
DGLContext{kDLCPU, 0}, DGLContext{kDGLCPU, 0},
mat.indptr->dtype); mat.indptr->dtype);
CUDA_CALL(cudaEventRecord(copyEvent, stream)); CUDA_CALL(cudaEventRecord(copyEvent, stream));
...@@ -651,13 +651,13 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat, ...@@ -651,13 +651,13 @@ COOMatrix CSRRowWiseSampling(CSRMatrix mat,
picked_col, picked_idx); picked_col, picked_idx);
} }
template COOMatrix CSRRowWiseSampling<kDLGPU, int32_t, float>( template COOMatrix CSRRowWiseSampling<kDGLCUDA, int32_t, float>(
CSRMatrix, IdArray, int64_t, FloatArray, bool); CSRMatrix, IdArray, int64_t, FloatArray, bool);
template COOMatrix CSRRowWiseSampling<kDLGPU, int64_t, float>( template COOMatrix CSRRowWiseSampling<kDGLCUDA, int64_t, float>(
CSRMatrix, IdArray, int64_t, FloatArray, bool); CSRMatrix, IdArray, int64_t, FloatArray, bool);
template COOMatrix CSRRowWiseSampling<kDLGPU, int32_t, double>( template COOMatrix CSRRowWiseSampling<kDGLCUDA, int32_t, double>(
CSRMatrix, IdArray, int64_t, FloatArray, bool); CSRMatrix, IdArray, int64_t, FloatArray, bool);
template COOMatrix CSRRowWiseSampling<kDLGPU, int64_t, double>( template COOMatrix CSRRowWiseSampling<kDGLCUDA, int64_t, double>(
CSRMatrix, IdArray, int64_t, FloatArray, bool); CSRMatrix, IdArray, int64_t, FloatArray, bool);
} // namespace impl } // namespace impl
......
...@@ -54,52 +54,52 @@ void SDDMMCoo(const std::string& op, ...@@ -54,52 +54,52 @@ void SDDMMCoo(const std::string& op,
} }
template void SDDMMCsr<kDLGPU, int32_t, 16>( template void SDDMMCsr<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int64_t, 16>( template void SDDMMCsr<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int32_t, 32>( template void SDDMMCsr<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int64_t, 32>( template void SDDMMCsr<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int32_t, 64>( template void SDDMMCsr<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCsr<kDLGPU, int64_t, 64>( template void SDDMMCsr<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, const std::string& op, const BcastOff& bcast, const CSRMatrix& csr,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int32_t, 16>( template void SDDMMCoo<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo, const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int64_t, 16>( template void SDDMMCoo<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo, const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int32_t, 32>( template void SDDMMCoo<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo, const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int64_t, 32>( template void SDDMMCoo<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo, const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int32_t, 64>( template void SDDMMCoo<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo, const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
template void SDDMMCoo<kDLGPU, int64_t, 64>( template void SDDMMCoo<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast, const COOMatrix& coo, const std::string& op, const BcastOff& bcast, const COOMatrix& coo,
NDArray lhs, NDArray rhs, NDArray out, NDArray lhs, NDArray rhs, NDArray out,
int lhs_target, int rhs_target); int lhs_target, int rhs_target);
......
...@@ -42,42 +42,42 @@ void SDDMMCooHetero(const std::string& op, ...@@ -42,42 +42,42 @@ void SDDMMCooHetero(const std::string& op,
} }
template void SDDMMCooHetero<kDLGPU, int32_t, 16>( template void SDDMMCooHetero<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo, const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int64_t, 16>( template void SDDMMCooHetero<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo, const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int32_t, 32>( template void SDDMMCooHetero<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo, const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int64_t, 32>( template void SDDMMCooHetero<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo, const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int32_t, 64>( template void SDDMMCooHetero<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo, const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCooHetero<kDLGPU, int64_t, 64>( template void SDDMMCooHetero<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<COOMatrix>& vec_coo, const std::vector<COOMatrix>& vec_coo,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
......
...@@ -41,42 +41,42 @@ void SDDMMCsrHetero(const std::string& op, ...@@ -41,42 +41,42 @@ void SDDMMCsrHetero(const std::string& op,
}); });
} }
template void SDDMMCsrHetero<kDLGPU, int32_t, 16>( template void SDDMMCsrHetero<kDGLCUDA, int32_t, 16>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr, const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int64_t, 16>( template void SDDMMCsrHetero<kDGLCUDA, int64_t, 16>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr, const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int32_t, 32>( template void SDDMMCsrHetero<kDGLCUDA, int32_t, 32>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr, const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int64_t, 32>( template void SDDMMCsrHetero<kDGLCUDA, int64_t, 32>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr, const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int32_t, 64>( template void SDDMMCsrHetero<kDGLCUDA, int32_t, 64>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr, const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
std::vector<NDArray> out, int lhs_target, int rhs_target, std::vector<NDArray> out, int lhs_target, int rhs_target,
const std::vector<dgl_type_t>& in_eid, const std::vector<dgl_type_t>& in_eid,
const std::vector<dgl_type_t>& out_eid); const std::vector<dgl_type_t>& out_eid);
template void SDDMMCsrHetero<kDLGPU, int64_t, 64>( template void SDDMMCsrHetero<kDGLCUDA, int64_t, 64>(
const std::string& op, const BcastOff& bcast, const std::string& op, const BcastOff& bcast,
const std::vector<CSRMatrix>& vec_csr, const std::vector<CSRMatrix>& vec_csr,
const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs, const std::vector<NDArray>& lhs, const std::vector<NDArray>& rhs,
......
...@@ -73,113 +73,113 @@ void BackwardSegmentCmp(NDArray feat, ...@@ -73,113 +73,113 @@ void BackwardSegmentCmp(NDArray feat,
} }
template void SegmentReduce<kDLGPU, int32_t, 16>( template void SegmentReduce<kDGLCUDA, int32_t, 16>(
const std::string& op, const std::string& op,
NDArray feat, NDArray feat,
NDArray offsets, NDArray offsets,
NDArray out, NDArray out,
NDArray arg); NDArray arg);
template void SegmentReduce<kDLGPU, int64_t, 16>( template void SegmentReduce<kDGLCUDA, int64_t, 16>(
const std::string &op, const std::string &op,
NDArray feat, NDArray feat,
NDArray offsets, NDArray offsets,
NDArray out, NDArray out,
NDArray arg); NDArray arg);
template void SegmentReduce<kDLGPU, int32_t, 32>( template void SegmentReduce<kDGLCUDA, int32_t, 32>(
const std::string& op, const std::string& op,
NDArray feat, NDArray feat,
NDArray offsets, NDArray offsets,
NDArray out, NDArray out,
NDArray arg); NDArray arg);
template void SegmentReduce<kDLGPU, int64_t, 32>( template void SegmentReduce<kDGLCUDA, int64_t, 32>(
const std::string &op, const std::string &op,
NDArray feat, NDArray feat,
NDArray offsets, NDArray offsets,
NDArray out, NDArray out,
NDArray arg); NDArray arg);
template void SegmentReduce<kDLGPU, int32_t, 64>( template void SegmentReduce<kDGLCUDA, int32_t, 64>(
const std::string &op, const std::string &op,
NDArray feat, NDArray feat,
NDArray offsets, NDArray offsets,
NDArray out, NDArray out,
NDArray arg); NDArray arg);
template void SegmentReduce<kDLGPU, int64_t, 64>( template void SegmentReduce<kDGLCUDA, int64_t, 64>(
const std::string &op, const std::string &op,
NDArray feat, NDArray feat,
NDArray offsets, NDArray offsets,
NDArray out, NDArray out,
NDArray arg); NDArray arg);
template void ScatterAdd<kDLGPU, int32_t, 16>( template void ScatterAdd<kDGLCUDA, int32_t, 16>(
NDArray feat, NDArray feat,
NDArray idx, NDArray idx,
NDArray out); NDArray out);
template void ScatterAdd<kDLGPU, int64_t, 16>( template void ScatterAdd<kDGLCUDA, int64_t, 16>(
NDArray feat, NDArray feat,
NDArray idx, NDArray idx,
NDArray out); NDArray out);
template void ScatterAdd<kDLGPU, int32_t, 32>( template void ScatterAdd<kDGLCUDA, int32_t, 32>(
NDArray feat, NDArray feat,
NDArray idx, NDArray idx,
NDArray out); NDArray out);
template void ScatterAdd<kDLGPU, int64_t, 32>( template void ScatterAdd<kDGLCUDA, int64_t, 32>(
NDArray feat, NDArray feat,
NDArray idx, NDArray idx,
NDArray out); NDArray out);
template void ScatterAdd<kDLGPU, int32_t, 64>( template void ScatterAdd<kDGLCUDA, int32_t, 64>(
NDArray feat, NDArray feat,
NDArray idx, NDArray idx,
NDArray out); NDArray out);
template void ScatterAdd<kDLGPU, int64_t, 64>( template void ScatterAdd<kDGLCUDA, int64_t, 64>(
NDArray feat, NDArray feat,
NDArray idx, NDArray idx,
NDArray out); NDArray out);
template void UpdateGradMinMax_hetero<kDLGPU, int32_t, 16>( template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, 16>(
const HeteroGraphPtr& g, const std::string& op, const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx, const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out); const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int64_t, 16>( template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, 16>(
const HeteroGraphPtr& g, const std::string& op, const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx, const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out); const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int32_t, 32>( template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, 32>(
const HeteroGraphPtr& g, const std::string& op, const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx, const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out); const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int64_t, 32>( template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, 32>(
const HeteroGraphPtr& g, const std::string& op, const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx, const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out); const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int32_t, 64>( template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, 64>(
const HeteroGraphPtr& g, const std::string& op, const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx, const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out); const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDLGPU, int64_t, 64>( template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, 64>(
const HeteroGraphPtr& g, const std::string& op, const HeteroGraphPtr& g, const std::string& op,
const std::vector<NDArray>& feat, const std::vector<NDArray>& idx, const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out); const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void BackwardSegmentCmp<kDLGPU, int32_t, 16>( template void BackwardSegmentCmp<kDGLCUDA, int32_t, 16>(
NDArray feat, NDArray feat,
NDArray arg, NDArray arg,
NDArray out); NDArray out);
template void BackwardSegmentCmp<kDLGPU, int64_t, 16>( template void BackwardSegmentCmp<kDGLCUDA, int64_t, 16>(
NDArray feat, NDArray feat,
NDArray arg, NDArray arg,
NDArray out); NDArray out);
template void BackwardSegmentCmp<kDLGPU, int32_t, 32>( template void BackwardSegmentCmp<kDGLCUDA, int32_t, 32>(
NDArray feat, NDArray feat,
NDArray arg, NDArray arg,
NDArray out); NDArray out);
template void BackwardSegmentCmp<kDLGPU, int64_t, 32>( template void BackwardSegmentCmp<kDGLCUDA, int64_t, 32>(
NDArray feat, NDArray feat,
NDArray arg, NDArray arg,
NDArray out); NDArray out);
template void BackwardSegmentCmp<kDLGPU, int32_t, 64>( template void BackwardSegmentCmp<kDGLCUDA, int32_t, 64>(
NDArray feat, NDArray feat,
NDArray arg, NDArray arg,
NDArray out); NDArray out);
template void BackwardSegmentCmp<kDLGPU, int64_t, 64>( template void BackwardSegmentCmp<kDGLCUDA, int64_t, 64>(
NDArray feat, NDArray feat,
NDArray arg, NDArray arg,
NDArray out); NDArray out);
......
...@@ -71,7 +71,7 @@ __global__ void _COOGetRowNNZKernel( ...@@ -71,7 +71,7 @@ __global__ void _COOGetRowNNZKernel(
} }
} }
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) { int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = coo.row->ctx; const auto& ctx = coo.row->ctx;
...@@ -84,12 +84,12 @@ int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) { ...@@ -84,12 +84,12 @@ int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
nb, nt, 0, stream, nb, nt, 0, stream,
coo.row.Ptr<IdType>(), rst.Ptr<IdType>(), coo.row.Ptr<IdType>(), rst.Ptr<IdType>(),
row, nnz); row, nnz);
rst = rst.CopyTo(DLContext{kDLCPU, 0}); rst = rst.CopyTo(DGLContext{kDGLCPU, 0});
return *rst.Ptr<IdType>(); return *rst.Ptr<IdType>();
} }
template int64_t COOGetRowNNZ<kDLGPU, int32_t>(COOMatrix, int64_t); template int64_t COOGetRowNNZ<kDGLCUDA, int32_t>(COOMatrix, int64_t);
template int64_t COOGetRowNNZ<kDLGPU, int64_t>(COOMatrix, int64_t); template int64_t COOGetRowNNZ<kDGLCUDA, int64_t>(COOMatrix, int64_t);
template <typename IdType> template <typename IdType>
__global__ void _COOGetAllRowNNZKernel( __global__ void _COOGetAllRowNNZKernel(
...@@ -104,7 +104,7 @@ __global__ void _COOGetAllRowNNZKernel( ...@@ -104,7 +104,7 @@ __global__ void _COOGetAllRowNNZKernel(
} }
} }
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) { NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = coo.row->ctx; const auto& ctx = coo.row->ctx;
...@@ -112,7 +112,7 @@ NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) { ...@@ -112,7 +112,7 @@ NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
IdType num_rows = coo.num_rows; IdType num_rows = coo.num_rows;
IdType num_queries = rows->shape[0]; IdType num_queries = rows->shape[0];
if (num_queries == 1) { if (num_queries == 1) {
auto rows_cpu = rows.CopyTo(DLContext{kDLCPU, 0}); auto rows_cpu = rows.CopyTo(DGLContext{kDGLCPU, 0});
int64_t row = *rows_cpu.Ptr<IdType>(); int64_t row = *rows_cpu.Ptr<IdType>();
IdType nt = 1024; IdType nt = 1024;
IdType nb = dgl::cuda::FindNumBlocks<'x'>((nnz + nt - 1) / nt); IdType nb = dgl::cuda::FindNumBlocks<'x'>((nnz + nt - 1) / nt);
...@@ -136,8 +136,8 @@ NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) { ...@@ -136,8 +136,8 @@ NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
} }
} }
template NDArray COOGetRowNNZ<kDLGPU, int32_t>(COOMatrix, NDArray); template NDArray COOGetRowNNZ<kDGLCUDA, int32_t>(COOMatrix, NDArray);
template NDArray COOGetRowNNZ<kDLGPU, int64_t>(COOMatrix, NDArray); template NDArray COOGetRowNNZ<kDGLCUDA, int64_t>(COOMatrix, NDArray);
} // namespace impl } // namespace impl
} // namespace aten } // namespace aten
......
...@@ -21,7 +21,7 @@ namespace impl { ...@@ -21,7 +21,7 @@ namespace impl {
///////////////////////////// CSRIsNonZero ///////////////////////////// ///////////////////////////// CSRIsNonZero /////////////////////////////
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) { bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = csr.indptr->ctx; const auto& ctx = csr.indptr->ctx;
...@@ -38,14 +38,14 @@ bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) { ...@@ -38,14 +38,14 @@ bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) {
rows.Ptr<IdType>(), cols.Ptr<IdType>(), rows.Ptr<IdType>(), cols.Ptr<IdType>(),
1, 1, 1, 1, 1, 1,
static_cast<IdType*>(nullptr), static_cast<IdType>(-1), out.Ptr<IdType>()); static_cast<IdType*>(nullptr), static_cast<IdType>(-1), out.Ptr<IdType>());
out = out.CopyTo(DLContext{kDLCPU, 0}); out = out.CopyTo(DGLContext{kDGLCPU, 0});
return *out.Ptr<IdType>() != -1; return *out.Ptr<IdType>() != -1;
} }
template bool CSRIsNonZero<kDLGPU, int32_t>(CSRMatrix, int64_t, int64_t); template bool CSRIsNonZero<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t);
template bool CSRIsNonZero<kDLGPU, int64_t>(CSRMatrix, int64_t, int64_t); template bool CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t);
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) { NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) {
const auto rowlen = row->shape[0]; const auto rowlen = row->shape[0];
const auto collen = col->shape[0]; const auto collen = col->shape[0];
...@@ -69,8 +69,8 @@ NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) { ...@@ -69,8 +69,8 @@ NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) {
return rst != -1; return rst != -1;
} }
template NDArray CSRIsNonZero<kDLGPU, int32_t>(CSRMatrix, NDArray, NDArray); template NDArray CSRIsNonZero<kDGLCUDA, int32_t>(CSRMatrix, NDArray, NDArray);
template NDArray CSRIsNonZero<kDLGPU, int64_t>(CSRMatrix, NDArray, NDArray); template NDArray CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, NDArray, NDArray);
///////////////////////////// CSRHasDuplicate ///////////////////////////// ///////////////////////////// CSRHasDuplicate /////////////////////////////
...@@ -95,7 +95,7 @@ __global__ void _SegmentHasNoDuplicate( ...@@ -95,7 +95,7 @@ __global__ void _SegmentHasNoDuplicate(
} }
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
bool CSRHasDuplicate(CSRMatrix csr) { bool CSRHasDuplicate(CSRMatrix csr) {
if (!csr.sorted) if (!csr.sorted)
csr = CSRSort(csr); csr = CSRSort(csr);
...@@ -116,20 +116,20 @@ bool CSRHasDuplicate(CSRMatrix csr) { ...@@ -116,20 +116,20 @@ bool CSRHasDuplicate(CSRMatrix csr) {
return !ret; return !ret;
} }
template bool CSRHasDuplicate<kDLGPU, int32_t>(CSRMatrix csr); template bool CSRHasDuplicate<kDGLCUDA, int32_t>(CSRMatrix csr);
template bool CSRHasDuplicate<kDLGPU, int64_t>(CSRMatrix csr); template bool CSRHasDuplicate<kDGLCUDA, int64_t>(CSRMatrix csr);
///////////////////////////// CSRGetRowNNZ ///////////////////////////// ///////////////////////////// CSRGetRowNNZ /////////////////////////////
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
int64_t CSRGetRowNNZ(CSRMatrix csr, int64_t row) { int64_t CSRGetRowNNZ(CSRMatrix csr, int64_t row) {
const IdType cur = aten::IndexSelect<IdType>(csr.indptr, row); const IdType cur = aten::IndexSelect<IdType>(csr.indptr, row);
const IdType next = aten::IndexSelect<IdType>(csr.indptr, row + 1); const IdType next = aten::IndexSelect<IdType>(csr.indptr, row + 1);
return next - cur; return next - cur;
} }
template int64_t CSRGetRowNNZ<kDLGPU, int32_t>(CSRMatrix, int64_t); template int64_t CSRGetRowNNZ<kDGLCUDA, int32_t>(CSRMatrix, int64_t);
template int64_t CSRGetRowNNZ<kDLGPU, int64_t>(CSRMatrix, int64_t); template int64_t CSRGetRowNNZ<kDGLCUDA, int64_t>(CSRMatrix, int64_t);
template <typename IdType> template <typename IdType>
__global__ void _CSRGetRowNNZKernel( __global__ void _CSRGetRowNNZKernel(
...@@ -146,7 +146,7 @@ __global__ void _CSRGetRowNNZKernel( ...@@ -146,7 +146,7 @@ __global__ void _CSRGetRowNNZKernel(
} }
} }
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) { NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto len = rows->shape[0]; const auto len = rows->shape[0];
...@@ -162,24 +162,24 @@ NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) { ...@@ -162,24 +162,24 @@ NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) {
return rst; return rst;
} }
template NDArray CSRGetRowNNZ<kDLGPU, int32_t>(CSRMatrix, NDArray); template NDArray CSRGetRowNNZ<kDGLCUDA, int32_t>(CSRMatrix, NDArray);
template NDArray CSRGetRowNNZ<kDLGPU, int64_t>(CSRMatrix, NDArray); template NDArray CSRGetRowNNZ<kDGLCUDA, int64_t>(CSRMatrix, NDArray);
///////////////////////////// CSRGetRowColumnIndices ///////////////////////////// ///////////////////////////// CSRGetRowColumnIndices /////////////////////////////
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
NDArray CSRGetRowColumnIndices(CSRMatrix csr, int64_t row) { NDArray CSRGetRowColumnIndices(CSRMatrix csr, int64_t row) {
const int64_t len = impl::CSRGetRowNNZ<XPU, IdType>(csr, row); const int64_t len = impl::CSRGetRowNNZ<XPU, IdType>(csr, row);
const int64_t offset = aten::IndexSelect<IdType>(csr.indptr, row) * sizeof(IdType); const int64_t offset = aten::IndexSelect<IdType>(csr.indptr, row) * sizeof(IdType);
return csr.indices.CreateView({len}, csr.indices->dtype, offset); return csr.indices.CreateView({len}, csr.indices->dtype, offset);
} }
template NDArray CSRGetRowColumnIndices<kDLGPU, int32_t>(CSRMatrix, int64_t); template NDArray CSRGetRowColumnIndices<kDGLCUDA, int32_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowColumnIndices<kDLGPU, int64_t>(CSRMatrix, int64_t); template NDArray CSRGetRowColumnIndices<kDGLCUDA, int64_t>(CSRMatrix, int64_t);
///////////////////////////// CSRGetRowData ///////////////////////////// ///////////////////////////// CSRGetRowData /////////////////////////////
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
NDArray CSRGetRowData(CSRMatrix csr, int64_t row) { NDArray CSRGetRowData(CSRMatrix csr, int64_t row) {
const int64_t len = impl::CSRGetRowNNZ<XPU, IdType>(csr, row); const int64_t len = impl::CSRGetRowNNZ<XPU, IdType>(csr, row);
const int64_t offset = aten::IndexSelect<IdType>(csr.indptr, row) * sizeof(IdType); const int64_t offset = aten::IndexSelect<IdType>(csr.indptr, row) * sizeof(IdType);
...@@ -189,12 +189,12 @@ NDArray CSRGetRowData(CSRMatrix csr, int64_t row) { ...@@ -189,12 +189,12 @@ NDArray CSRGetRowData(CSRMatrix csr, int64_t row) {
return aten::Range(offset, offset + len, csr.indptr->dtype.bits, csr.indptr->ctx); return aten::Range(offset, offset + len, csr.indptr->dtype.bits, csr.indptr->ctx);
} }
template NDArray CSRGetRowData<kDLGPU, int32_t>(CSRMatrix, int64_t); template NDArray CSRGetRowData<kDGLCUDA, int32_t>(CSRMatrix, int64_t);
template NDArray CSRGetRowData<kDLGPU, int64_t>(CSRMatrix, int64_t); template NDArray CSRGetRowData<kDGLCUDA, int64_t>(CSRMatrix, int64_t);
///////////////////////////// CSRSliceRows ///////////////////////////// ///////////////////////////// CSRSliceRows /////////////////////////////
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) { CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) {
const int64_t num_rows = end - start; const int64_t num_rows = end - start;
const IdType st_pos = aten::IndexSelect<IdType>(csr.indptr, start); const IdType st_pos = aten::IndexSelect<IdType>(csr.indptr, start);
...@@ -215,8 +215,8 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) { ...@@ -215,8 +215,8 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) {
csr.sorted); csr.sorted);
} }
template CSRMatrix CSRSliceRows<kDLGPU, int32_t>(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t);
template CSRMatrix CSRSliceRows<kDLGPU, int64_t>(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t);
/*! /*!
* \brief Copy data segment to output buffers * \brief Copy data segment to output buffers
...@@ -243,7 +243,7 @@ __global__ void _SegmentCopyKernel( ...@@ -243,7 +243,7 @@ __global__ void _SegmentCopyKernel(
} }
} }
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) { CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const int64_t len = rows->shape[0]; const int64_t len = rows->shape[0];
...@@ -272,8 +272,8 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) { ...@@ -272,8 +272,8 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) {
csr.sorted); csr.sorted);
} }
template CSRMatrix CSRSliceRows<kDLGPU, int32_t>(CSRMatrix , NDArray); template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix , NDArray);
template CSRMatrix CSRSliceRows<kDLGPU, int64_t>(CSRMatrix , NDArray); template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix , NDArray);
///////////////////////////// CSRGetDataAndIndices ///////////////////////////// ///////////////////////////// CSRGetDataAndIndices /////////////////////////////
...@@ -345,7 +345,7 @@ __global__ void _SortedSearchKernel( ...@@ -345,7 +345,7 @@ __global__ void _SortedSearchKernel(
} }
} }
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
std::vector<NDArray> CSRGetDataAndIndices(CSRMatrix csr, NDArray row, NDArray col) { std::vector<NDArray> CSRGetDataAndIndices(CSRMatrix csr, NDArray row, NDArray col) {
const auto rowlen = row->shape[0]; const auto rowlen = row->shape[0];
const auto collen = col->shape[0]; const auto collen = col->shape[0];
...@@ -392,9 +392,9 @@ std::vector<NDArray> CSRGetDataAndIndices(CSRMatrix csr, NDArray row, NDArray co ...@@ -392,9 +392,9 @@ std::vector<NDArray> CSRGetDataAndIndices(CSRMatrix csr, NDArray row, NDArray co
return {ret_row, ret_col, ret_data}; return {ret_row, ret_col, ret_data};
} }
template std::vector<NDArray> CSRGetDataAndIndices<kDLGPU, int32_t>( template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int32_t>(
CSRMatrix csr, NDArray rows, NDArray cols); CSRMatrix csr, NDArray rows, NDArray cols);
template std::vector<NDArray> CSRGetDataAndIndices<kDLGPU, int64_t>( template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int64_t>(
CSRMatrix csr, NDArray rows, NDArray cols); CSRMatrix csr, NDArray rows, NDArray cols);
///////////////////////////// CSRSliceMatrix ///////////////////////////// ///////////////////////////// CSRSliceMatrix /////////////////////////////
...@@ -422,7 +422,7 @@ __global__ void _SegmentMaskColKernel( ...@@ -422,7 +422,7 @@ __global__ void _SegmentMaskColKernel(
} }
} }
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols) { CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = rows->ctx; const auto& ctx = rows->ctx;
...@@ -501,9 +501,9 @@ CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray ...@@ -501,9 +501,9 @@ CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray
ret_col, ret_data); ret_col, ret_data);
} }
template CSRMatrix CSRSliceMatrix<kDLGPU, int32_t>( template CSRMatrix CSRSliceMatrix<kDGLCUDA, int32_t>(
CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols);
template CSRMatrix CSRSliceMatrix<kDLGPU, int64_t>( template CSRMatrix CSRSliceMatrix<kDGLCUDA, int64_t>(
CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols);
} // namespace impl } // namespace impl
......
...@@ -147,53 +147,53 @@ void SpMMCoo(const std::string& op, const std::string& reduce, ...@@ -147,53 +147,53 @@ void SpMMCoo(const std::string& op, const std::string& reduce,
} }
} }
template void SpMMCsr<kDLGPU, int32_t, 16>( template void SpMMCsr<kDGLCUDA, int32_t, 16>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr, const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int64_t, 16>( template void SpMMCsr<kDGLCUDA, int64_t, 16>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr, const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int32_t, 32>( template void SpMMCsr<kDGLCUDA, int32_t, 32>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr, const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int64_t, 32>( template void SpMMCsr<kDGLCUDA, int64_t, 32>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr, const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int32_t, 64>( template void SpMMCsr<kDGLCUDA, int32_t, 64>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr, const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCsr<kDLGPU, int64_t, 64>( template void SpMMCsr<kDGLCUDA, int64_t, 64>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const CSRMatrix& csr, const BcastOff& bcast, const CSRMatrix& csr,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int32_t, 16>( template void SpMMCoo<kDGLCUDA, int32_t, 16>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo, const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int64_t, 16>( template void SpMMCoo<kDGLCUDA, int64_t, 16>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo, const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int32_t, 32>( template void SpMMCoo<kDGLCUDA, int32_t, 32>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo, const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int64_t, 32>( template void SpMMCoo<kDGLCUDA, int64_t, 32>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo, const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int32_t, 64>( template void SpMMCoo<kDGLCUDA, int32_t, 64>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo, const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
template void SpMMCoo<kDLGPU, int64_t, 64>( template void SpMMCoo<kDGLCUDA, int64_t, 64>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const COOMatrix& coo, const BcastOff& bcast, const COOMatrix& coo,
NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux); NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
......
...@@ -203,7 +203,7 @@ cusparseStatus_t Xcsrmm2<double>(cusparseHandle_t handle, cusparseOperation_t tr ...@@ -203,7 +203,7 @@ cusparseStatus_t Xcsrmm2<double>(cusparseHandle_t handle, cusparseOperation_t tr
/*! Cusparse implementation of SpMM on Csr format. */ /*! Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType> template <typename DType, typename IdType>
void CusparseCsrmm2( void CusparseCsrmm2(
const DLContext& ctx, const DGLContext& ctx,
const CSRMatrix& csr, const CSRMatrix& csr,
const DType* B_data, const DType* A_data, const DType* B_data, const DType* A_data,
DType* C_data, DType* C_data,
...@@ -303,7 +303,7 @@ void CusparseCsrmm2( ...@@ -303,7 +303,7 @@ void CusparseCsrmm2(
/*! Cusparse implementation of SpMM on Csr format. */ /*! Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType> template <typename DType, typename IdType>
void CusparseCsrmm2Hetero( void CusparseCsrmm2Hetero(
const DLContext& ctx, const DGLContext& ctx,
const CSRMatrix& csr, const CSRMatrix& csr,
const DType* B_data, const DType* A_data, const DType* B_data, const DType* A_data,
DType* C_data, DType* C_data,
......
...@@ -199,37 +199,37 @@ void SpMMCsrHetero(const std::string& op, const std::string& reduce, ...@@ -199,37 +199,37 @@ void SpMMCsrHetero(const std::string& op, const std::string& reduce,
}); });
} }
template void SpMMCsrHetero<kDLGPU, int32_t, 16>( template void SpMMCsrHetero<kDGLCUDA, int32_t, 16>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr, const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat, const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux, std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids); const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int64_t, 16>( template void SpMMCsrHetero<kDGLCUDA, int64_t, 16>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr, const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat, const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux, std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids); const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int32_t, 32>( template void SpMMCsrHetero<kDGLCUDA, int32_t, 32>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr, const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat, const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux, std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids); const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int64_t, 32>( template void SpMMCsrHetero<kDGLCUDA, int64_t, 32>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr, const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat, const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux, std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids); const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int32_t, 64>( template void SpMMCsrHetero<kDGLCUDA, int32_t, 64>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr, const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat, const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux, std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids); const std::vector<dgl_type_t>& ufeat_ntids, const std::vector<dgl_type_t>& out_ntids);
template void SpMMCsrHetero<kDLGPU, int64_t, 64>( template void SpMMCsrHetero<kDGLCUDA, int64_t, 64>(
const std::string& op, const std::string& reduce, const std::string& op, const std::string& reduce,
const BcastOff& bcast, const std::vector<CSRMatrix>& csr, const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat, const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
namespace dgl { namespace dgl {
namespace cuda { namespace cuda {
bool AllTrue(int8_t* flags, int64_t length, const DLContext& ctx) { bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx) {
auto device = runtime::DeviceAPI::Get(ctx); auto device = runtime::DeviceAPI::Get(ctx);
int8_t* rst = static_cast<int8_t*>(device->AllocWorkspace(ctx, 1)); int8_t* rst = static_cast<int8_t*>(device->AllocWorkspace(ctx, 1));
// Call CUB's reduction // Call CUB's reduction
......
...@@ -7,9 +7,9 @@ ...@@ -7,9 +7,9 @@
#define DGL_ARRAY_CUDA_UTILS_H_ #define DGL_ARRAY_CUDA_UTILS_H_
#include <dmlc/logging.h> #include <dmlc/logging.h>
#include <dgl/runtime/c_runtime_api.h>
#include <dgl/runtime/device_api.h> #include <dgl/runtime/device_api.h>
#include <dgl/runtime/ndarray.h> #include <dgl/runtime/ndarray.h>
#include <dlpack/dlpack.h>
#include "../../runtime/cuda/cuda_common.h" #include "../../runtime/cuda/cuda_common.h"
namespace dgl { namespace dgl {
...@@ -115,7 +115,7 @@ __device__ __forceinline__ T _ldg(T* addr) { ...@@ -115,7 +115,7 @@ __device__ __forceinline__ T _ldg(T* addr) {
* \param ctx Device context. * \param ctx Device context.
* \return True if all the flags are true. * \return True if all the flags are true.
*/ */
bool AllTrue(int8_t* flags, int64_t length, const DLContext& ctx); bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx);
/*! /*!
* \brief CUDA Kernel of filling the vector started from ptr of size length * \brief CUDA Kernel of filling the vector started from ptr of size length
...@@ -187,7 +187,7 @@ __global__ void _LinearSearchKernel( ...@@ -187,7 +187,7 @@ __global__ void _LinearSearchKernel(
template <typename DType> template <typename DType>
inline DType GetCUDAScalar( inline DType GetCUDAScalar(
runtime::DeviceAPI* device_api, runtime::DeviceAPI* device_api,
DLContext ctx, DGLContext ctx,
const DType* cuda_ptr) { const DType* cuda_ptr) {
DType result; DType result;
device_api->CopyDataFromTo( device_api->CopyDataFromTo(
...@@ -195,8 +195,8 @@ inline DType GetCUDAScalar( ...@@ -195,8 +195,8 @@ inline DType GetCUDAScalar(
&result, 0, &result, 0,
sizeof(result), sizeof(result),
ctx, ctx,
DLContext{kDLCPU, 0}, DGLContext{kDGLCPU, 0},
DLDataTypeTraits<DType>::dtype); DGLDataTypeTraits<DType>::dtype);
return result; return result;
} }
......
...@@ -25,7 +25,7 @@ NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) { ...@@ -25,7 +25,7 @@ NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
std::vector<int64_t> shape{len}; std::vector<int64_t> shape{len};
CHECK(array.IsPinned()); CHECK(array.IsPinned());
CHECK_EQ(index->ctx.device_type, kDLGPU); CHECK_EQ(index->ctx.device_type, kDGLCUDA);
for (int d = 1; d < array->ndim; ++d) { for (int d = 1; d < array->ndim; ++d) {
num_feat *= array->shape[d]; num_feat *= array->shape[d];
...@@ -85,8 +85,8 @@ void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) { ...@@ -85,8 +85,8 @@ void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) {
std::vector<int64_t> shape{len}; std::vector<int64_t> shape{len};
CHECK(dest.IsPinned()); CHECK(dest.IsPinned());
CHECK_EQ(index->ctx.device_type, kDLGPU); CHECK_EQ(index->ctx.device_type, kDGLCUDA);
CHECK_EQ(source->ctx.device_type, kDLGPU); CHECK_EQ(source->ctx.device_type, kDGLCUDA);
for (int d = 1; d < source->ndim; ++d) { for (int d = 1; d < source->ndim; ++d) {
num_feat *= source->shape[d]; num_feat *= source->shape[d];
......
...@@ -15,7 +15,7 @@ namespace array { ...@@ -15,7 +15,7 @@ namespace array {
using namespace dgl::runtime; using namespace dgl::runtime;
template<DLDeviceType XPU, typename IdType> template<DGLDeviceType XPU, typename IdType>
FilterRef CreateSetFilter(IdArray set); FilterRef CreateSetFilter(IdArray set);
DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterCreateFromSet") DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterCreateFromSet")
...@@ -23,10 +23,10 @@ DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterCreateFromSet") ...@@ -23,10 +23,10 @@ DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterCreateFromSet")
IdArray array = args[0]; IdArray array = args[0];
auto ctx = array->ctx; auto ctx = array->ctx;
// TODO(nv-dlasalle): Implement CPU version. // TODO(nv-dlasalle): Implement CPU version.
if (ctx.device_type == kDLGPU) { if (ctx.device_type == kDGLCUDA) {
#ifdef DGL_USE_CUDA #ifdef DGL_USE_CUDA
ATEN_ID_TYPE_SWITCH(array->dtype, IdType, { ATEN_ID_TYPE_SWITCH(array->dtype, IdType, {
*rv = CreateSetFilter<kDLGPU, IdType>(array); *rv = CreateSetFilter<kDGLCUDA, IdType>(array);
}); });
#else #else
LOG(FATAL) << "GPU support not compiled."; LOG(FATAL) << "GPU support not compiled.";
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#ifdef USE_TVM #ifdef USE_TVM
#include <featgraph.h> #include <featgraph.h>
#include <dgl/runtime/dlpack_convert.h>
#endif // USE_TVM #endif // USE_TVM
#include "kernel_decl.h" #include "kernel_decl.h"
...@@ -70,7 +71,7 @@ void SegmentMM(const NDArray A, ...@@ -70,7 +71,7 @@ void SegmentMM(const NDArray A,
} }
CHECK_EQ(B->shape[0], seglen_A.NumElements()) CHECK_EQ(B->shape[0], seglen_A.NumElements())
<< "segment_mm expects len(seglen_A) == B.shape[0]"; << "segment_mm expects len(seglen_A) == B.shape[0]";
CHECK_EQ(seglen_A->ctx.device_type, kDLCPU) CHECK_EQ(seglen_A->ctx.device_type, kDGLCPU)
<< "segment_mm expects seglen_A to be on CPU."; << "segment_mm expects seglen_A to be on CPU.";
CHECK(A->ctx == B->ctx) << "segment_mm expects A and B to be of the same device"; CHECK(A->ctx == B->ctx) << "segment_mm expects A and B to be of the same device";
ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMM", { ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMM", {
...@@ -89,7 +90,7 @@ void SegmentMMBackwardB(const NDArray A, ...@@ -89,7 +90,7 @@ void SegmentMMBackwardB(const NDArray A,
CHECK_EQ(A->ndim, 2) << "segment_mm_backward operator expects a 2D tensor for the first input."; CHECK_EQ(A->ndim, 2) << "segment_mm_backward operator expects a 2D tensor for the first input.";
CHECK_EQ(dC->ndim, 2) CHECK_EQ(dC->ndim, 2)
<< "segment_mm_backward operator expects a 2D tensor for the second input."; << "segment_mm_backward operator expects a 2D tensor for the second input.";
CHECK_EQ(seglen->ctx.device_type, kDLCPU) CHECK_EQ(seglen->ctx.device_type, kDGLCPU)
<< "segment_mm expects seglen to be on CPU."; << "segment_mm expects seglen to be on CPU.";
ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMMBackwardB", { ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMMBackwardB", {
ATEN_ID_TYPE_SWITCH(seglen->dtype, IdType, { ATEN_ID_TYPE_SWITCH(seglen->dtype, IdType, {
...@@ -829,8 +830,12 @@ DGL_REGISTER_GLOBAL("sparse._CAPI_FG_SDDMMTreeReduction") ...@@ -829,8 +830,12 @@ DGL_REGISTER_GLOBAL("sparse._CAPI_FG_SDDMMTreeReduction")
// {lhs, rhs, out}, // {lhs, rhs, out},
// {"U_data", "E_data", "V_data"}); // {"U_data", "E_data", "V_data"});
COOMatrix coo = graph.sptr()->GetCOOMatrix(0); COOMatrix coo = graph.sptr()->GetCOOMatrix(0);
dgl::featgraph::SDDMMTreeReduction(coo.row.ToDLPack(), coo.col.ToDLPack(), dgl::featgraph::SDDMMTreeReduction(
lhs.ToDLPack(), rhs.ToDLPack(), out.ToDLPack()); DLPackConvert::ToDLPack(coo.row),
DLPackConvert::ToDLPack(coo.col),
DLPackConvert::ToDLPack(lhs),
DLPackConvert::ToDLPack(rhs),
DLPackConvert::ToDLPack(out));
}); });
#endif // USE_TVM #endif // USE_TVM
......
...@@ -16,7 +16,7 @@ namespace aten { ...@@ -16,7 +16,7 @@ namespace aten {
NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) { NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
#ifdef DGL_USE_CUDA #ifdef DGL_USE_CUDA
CHECK(array.IsPinned()) << "Input array must be in pinned memory."; CHECK(array.IsPinned()) << "Input array must be in pinned memory.";
CHECK_EQ(index->ctx.device_type, kDLGPU) << "Index must be on the GPU."; CHECK_EQ(index->ctx.device_type, kDGLCUDA) << "Index must be on the GPU.";
CHECK_GE(array->ndim, 1) << "Input array must have at least 1 dimension."; CHECK_GE(array->ndim, 1) << "Input array must have at least 1 dimension.";
CHECK_EQ(index->ndim, 1) << "Index must be a 1D array."; CHECK_EQ(index->ndim, 1) << "Index must be a 1D array.";
...@@ -34,8 +34,8 @@ NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) { ...@@ -34,8 +34,8 @@ NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) {
void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) { void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) {
#ifdef DGL_USE_CUDA #ifdef DGL_USE_CUDA
CHECK(dest.IsPinned()) << "Destination array must be in pinned memory."; CHECK(dest.IsPinned()) << "Destination array must be in pinned memory.";
CHECK_EQ(index->ctx.device_type, kDLGPU) << "Index must be on the GPU."; CHECK_EQ(index->ctx.device_type, kDGLCUDA) << "Index must be on the GPU.";
CHECK_EQ(source->ctx.device_type, kDLGPU) << "Source array must be on the GPU."; CHECK_EQ(source->ctx.device_type, kDGLCUDA) << "Source array must be on the GPU.";
CHECK_EQ(dest->dtype, source->dtype) << "Destination array and source " CHECK_EQ(dest->dtype, source->dtype) << "Destination array and source "
"array must have the same dtype."; "array must have the same dtype.";
CHECK_GE(dest->ndim, 1) << "Destination array must have at least 1 dimension."; CHECK_GE(dest->ndim, 1) << "Destination array must have at least 1 dimension.";
......
...@@ -41,8 +41,8 @@ dgl::runtime::NDArray CopyVectorToNDArray( ...@@ -41,8 +41,8 @@ dgl::runtime::NDArray CopyVectorToNDArray(
const std::vector<DType>& vec) { const std::vector<DType>& vec) {
using dgl::runtime::NDArray; using dgl::runtime::NDArray;
const int64_t len = vec.size(); const int64_t len = vec.size();
NDArray a = NDArray::Empty({len}, DLDataType{kDLInt, sizeof(IdType) * 8, 1}, NDArray a = NDArray::Empty({len}, DGLDataType{kDGLInt, sizeof(IdType) * 8, 1},
DLContext{kDLCPU, 0}); DGLContext{kDGLCPU, 0});
std::copy(vec.begin(), vec.end(), static_cast<IdType*>(a->data)); std::copy(vec.begin(), vec.end(), static_cast<IdType*>(a->data));
return a; return a;
} }
......
...@@ -50,7 +50,7 @@ template void GroupIndexShuffle<int64_t>( ...@@ -50,7 +50,7 @@ template void GroupIndexShuffle<int64_t>(
template <typename IdType> template <typename IdType>
IdArray RandomPerm(int64_t num_nodes) { IdArray RandomPerm(int64_t num_nodes) {
IdArray perm = aten::NewIdArray(num_nodes, DLContext{kDLCPU, 0}, sizeof(IdType) * 8); IdArray perm = aten::NewIdArray(num_nodes, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8);
IdType* perm_data = static_cast<IdType*>(perm->data); IdType* perm_data = static_cast<IdType*>(perm->data);
std::iota(perm_data, perm_data + num_nodes, 0); std::iota(perm_data, perm_data + num_nodes, 0);
IndexShuffle(perm_data, num_nodes); IndexShuffle(perm_data, num_nodes);
...@@ -59,7 +59,7 @@ IdArray RandomPerm(int64_t num_nodes) { ...@@ -59,7 +59,7 @@ IdArray RandomPerm(int64_t num_nodes) {
template <typename IdType> template <typename IdType>
IdArray GroupRandomPerm(const IdType *group_idxs, int64_t num_group_idxs, int64_t num_nodes) { IdArray GroupRandomPerm(const IdType *group_idxs, int64_t num_group_idxs, int64_t num_nodes) {
IdArray perm = aten::NewIdArray(num_nodes, DLContext{kDLCPU, 0}, sizeof(IdType) * 8); IdArray perm = aten::NewIdArray(num_nodes, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8);
IdType* perm_data = static_cast<IdType*>(perm->data); IdType* perm_data = static_cast<IdType*>(perm->data);
std::iota(perm_data, perm_data + num_nodes, 0); std::iota(perm_data, perm_data + num_nodes, 0);
GroupIndexShuffle(group_idxs, perm_data, num_group_idxs, num_nodes); GroupIndexShuffle(group_idxs, perm_data, num_group_idxs, num_nodes);
...@@ -77,7 +77,7 @@ IdArray GroupRandomPerm(const IdType *group_idxs, int64_t num_group_idxs, int64_ ...@@ -77,7 +77,7 @@ IdArray GroupRandomPerm(const IdType *group_idxs, int64_t num_group_idxs, int64_
* Finally, we pick the point with the maximum such distance. * Finally, we pick the point with the maximum such distance.
* This process will be repeated for ``sample_points`` - 1 times. * This process will be repeated for ``sample_points`` - 1 times.
*/ */
template <DLDeviceType XPU, typename FloatType, typename IdType> template <DGLDeviceType XPU, typename FloatType, typename IdType>
void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_points, void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result) { NDArray dist, IdArray start_idx, IdArray result) {
const FloatType* array_data = static_cast<FloatType*>(array->data); const FloatType* array_data = static_cast<FloatType*>(array->data);
...@@ -135,20 +135,20 @@ void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_poin ...@@ -135,20 +135,20 @@ void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_poin
ret_start += sample_points; ret_start += sample_points;
} }
} }
template void FarthestPointSampler<kDLCPU, float, int32_t>( template void FarthestPointSampler<kDGLCPU, float, int32_t>(
NDArray array, int64_t batch_size, int64_t sample_points, NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result); NDArray dist, IdArray start_idx, IdArray result);
template void FarthestPointSampler<kDLCPU, float, int64_t>( template void FarthestPointSampler<kDGLCPU, float, int64_t>(
NDArray array, int64_t batch_size, int64_t sample_points, NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result); NDArray dist, IdArray start_idx, IdArray result);
template void FarthestPointSampler<kDLCPU, double, int32_t>( template void FarthestPointSampler<kDGLCPU, double, int32_t>(
NDArray array, int64_t batch_size, int64_t sample_points, NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result); NDArray dist, IdArray start_idx, IdArray result);
template void FarthestPointSampler<kDLCPU, double, int64_t>( template void FarthestPointSampler<kDGLCPU, double, int64_t>(
NDArray array, int64_t batch_size, int64_t sample_points, NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result); NDArray dist, IdArray start_idx, IdArray result);
template <DLDeviceType XPU, typename FloatType, typename IdType> template <DGLDeviceType XPU, typename FloatType, typename IdType>
void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) { void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) {
const int64_t num_nodes = result->shape[0]; const int64_t num_nodes = result->shape[0];
const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data); const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data);
...@@ -181,16 +181,16 @@ void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, ...@@ -181,16 +181,16 @@ void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight,
result_data[v_max] = result_data[u]; result_data[v_max] = result_data[u];
} }
} }
template void WeightedNeighborMatching<kDLCPU, float, int32_t>( template void WeightedNeighborMatching<kDGLCPU, float, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, float, int64_t>( template void WeightedNeighborMatching<kDGLCPU, float, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, double, int32_t>( template void WeightedNeighborMatching<kDGLCPU, double, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, double, int64_t>( template void WeightedNeighborMatching<kDGLCPU, double, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) { void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
const int64_t num_nodes = result->shape[0]; const int64_t num_nodes = result->shape[0];
const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data); const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data);
...@@ -221,8 +221,8 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) { ...@@ -221,8 +221,8 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
} }
} }
} }
template void NeighborMatching<kDLCPU, int32_t>(const aten::CSRMatrix &csr, IdArray result); template void NeighborMatching<kDGLCPU, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDLCPU, int64_t>(const aten::CSRMatrix &csr, IdArray result); template void NeighborMatching<kDGLCPU, int64_t>(const aten::CSRMatrix &csr, IdArray result);
} // namespace impl } // namespace impl
} // namespace geometry } // namespace geometry
......
...@@ -150,7 +150,7 @@ bool Colorize(IdType * result_data, int64_t num_nodes, float * const prop) { ...@@ -150,7 +150,7 @@ bool Colorize(IdType * result_data, int64_t num_nodes, float * const prop) {
* are marked, mark this node with its id. Else match this (BLUE, RED) node * are marked, mark this node with its id. Else match this (BLUE, RED) node
* pair and mark them with the smaller id between them. * pair and mark them with the smaller id between them.
*/ */
template <DLDeviceType XPU, typename FloatType, typename IdType> template <DGLDeviceType XPU, typename FloatType, typename IdType>
void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) { void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
const auto& ctx = result->ctx; const auto& ctx = result->ctx;
...@@ -182,13 +182,13 @@ void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, ...@@ -182,13 +182,13 @@ void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight,
} }
device->FreeWorkspace(ctx, prop); device->FreeWorkspace(ctx, prop);
} }
template void WeightedNeighborMatching<kDLGPU, float, int32_t>( template void WeightedNeighborMatching<kDGLCUDA, float, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, float, int64_t>( template void WeightedNeighborMatching<kDGLCUDA, float, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, double, int32_t>( template void WeightedNeighborMatching<kDGLCUDA, double, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, double, int64_t>( template void WeightedNeighborMatching<kDGLCUDA, double, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result); const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
/*! \brief Unweighted neighbor matching procedure (GPU version). /*! \brief Unweighted neighbor matching procedure (GPU version).
...@@ -201,7 +201,7 @@ template void WeightedNeighborMatching<kDLGPU, double, int64_t>( ...@@ -201,7 +201,7 @@ template void WeightedNeighborMatching<kDLGPU, double, int64_t>(
* 2. Graph is sparse, thus neighborhood of each node is small, * 2. Graph is sparse, thus neighborhood of each node is small,
* which is suitable for GPU implementation. * which is suitable for GPU implementation.
*/ */
template <DLDeviceType XPU, typename IdType> template <DGLDeviceType XPU, typename IdType>
void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) { void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
const int64_t num_edges = csr.indices->shape[0]; const int64_t num_edges = csr.indices->shape[0];
const auto& ctx = result->ctx; const auto& ctx = result->ctx;
...@@ -211,7 +211,7 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) { ...@@ -211,7 +211,7 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
// generate random weights // generate random weights
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
NDArray weight = NDArray::Empty( NDArray weight = NDArray::Empty(
{num_edges}, DLDataType{kDLFloat, sizeof(float) * 8, 1}, ctx); {num_edges}, DGLDataType{kDGLFloat, sizeof(float) * 8, 1}, ctx);
float *weight_data = static_cast<float*>(weight->data); float *weight_data = static_cast<float*>(weight->data);
uint64_t seed = dgl::RandomEngine::ThreadLocal()->RandInt(UINT64_MAX); uint64_t seed = dgl::RandomEngine::ThreadLocal()->RandInt(UINT64_MAX);
auto num_threads = cuda::FindNumThreads(num_edges); auto num_threads = cuda::FindNumThreads(num_edges);
...@@ -221,8 +221,8 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) { ...@@ -221,8 +221,8 @@ void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
WeightedNeighborMatching<XPU, float, IdType>(csr, weight, result); WeightedNeighborMatching<XPU, float, IdType>(csr, weight, result);
} }
template void NeighborMatching<kDLGPU, int32_t>(const aten::CSRMatrix &csr, IdArray result); template void NeighborMatching<kDGLCUDA, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDLGPU, int64_t>(const aten::CSRMatrix &csr, IdArray result); template void NeighborMatching<kDGLCUDA, int64_t>(const aten::CSRMatrix &csr, IdArray result);
} // namespace impl } // namespace impl
} // namespace geometry } // namespace geometry
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment