"...pytorch/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "5fbb33e73dd1b05426882829875e143068a84482"
Commit bcd33e0a authored by xiang song(charlie.song)'s avatar xiang song(charlie.song) Committed by Zihao Ye
Browse files

[Kernel][Perf] Message builtin with broadcasting performance optimization (#815)

* upd

* fig edgebatch edges

* add test

* trigger

* Update README.md for pytorch PinSage example.

Add noting that the PinSage model example under
example/pytorch/recommendation only work with Python 3.6+
as its dataset loader depends on stanfordnlp package
which work only with Python 3.6+.

* Provid a frame agnostic API to test nn modules on both CPU and CUDA side.

1. make dgl.nn.xxx frame agnostic
2. make test.backend include dgl.nn modules
3. modify test_edge_softmax of test/mxnet/test_nn.py and
    test/pytorch/test_nn.py work on both CPU and GPU

* Fix style

* Delete unused code

* Make agnostic test only related to tests/backend

1. clear all agnostic related code in dgl.nn
2. make test_graph_conv agnostic to cpu/gpu

* Fix code style

* fix

* doc

* Make all test code under tests.mxnet/pytorch.test_nn.py
work on both CPU and GPU.

* Fix syntex

* Remove rand

* Add TAGCN nn.module and example

* Now tagcn can run on CPU.

* Add unitest for TGConv

* Fix style

* For pubmed dataset, using --lr=0.005 can achieve better acc

* Fix style

* Fix some descriptions

* Test performance of udf

* trigger

* Fix doc

* Add nn.TGConv and example

* Update test code

* Fix bug

* Update data in mxnet.tagcn test acc.

* Fix some comments and code

* delete useless code

* Fix namming

* Fix bug

* Fix bug

* Add test for mxnet TAGCov

* Add test code for mxnet TAGCov

* Update some docs

* Fix some code

* Update docs dgl.nn.mxnet

* Update weight init

* Fix

* Minor opt for URRevel

* Delete test code

* Update code style and notes.

* Fix func name
parent 189c2c09
...@@ -38,6 +38,7 @@ TAGConv ...@@ -38,6 +38,7 @@ TAGConv
:members: forward :members: forward
:show-inheritance: :show-inheritance:
Global Pooling Layers Global Pooling Layers
---------------------------------------- ----------------------------------------
......
...@@ -67,6 +67,24 @@ struct BackwardBinaryReduce { ...@@ -67,6 +67,24 @@ struct BackwardBinaryReduce {
} }
}; };
// Convert flattened index to multi-dimension index (assume row-major).
__device__ __forceinline__ void Unravel(
int64_t idx, int ndim, const int64_t* shape, const int64_t* stride, int64_t* out) {
for (int d = 0; d < ndim; ++d) {
out[d] = (idx / stride[d]) % shape[d];
}
}
// Convert multi-dimension index to flattened index (assume row-major).
__device__ __forceinline__ int64_t Ravel(
const int64_t* idx, int ndim, const int64_t* shape, const int64_t* stride) {
int64_t out = 0;
for (int d = 0; d < ndim; ++d) {
out += min(idx[d], shape[d] - 1) * stride[d];
}
return out;
}
// Minigun UDF to compute backward binary reduce with broadcasting. // Minigun UDF to compute backward binary reduce with broadcasting.
template <int Mode, int NDim, typename Idx, typename DType, typename Functors> template <int Mode, int NDim, typename Idx, typename DType, typename Functors>
struct BackwardBinaryReduceBcast { struct BackwardBinaryReduceBcast {
......
...@@ -54,24 +54,56 @@ struct BinaryReduce { ...@@ -54,24 +54,56 @@ struct BinaryReduce {
} }
}; };
// Convert flattened index to multi-dimension index (assume row-major). /*
__device__ __forceinline__ void Unravel( * This func do the followings:
int64_t idx, int ndim, const int64_t* shape, const int64_t* stride, int64_t* out) { * 1. Convert flattened index to multi-dimension index
for (int d = 0; d < ndim; ++d) { * according to output shape (assume row-major).
out[d] = (idx / stride[d]) % shape[d]; * 2. Convert multi-dimension index to flattened index for lhs.
* 3. Convert multi-dimension index to flattened index for rhs.
*/
__device__ __forceinline__ void UnravelRavel(
const int64_t idx, const int ndim, const int64_t* out_shape, const int64_t* out_stride,
const int64_t* lhs_shape, const int64_t* lhs_stride,
const int64_t* rhs_shape, const int64_t* rhs_stride, int64_t *lhs_out, int64_t *rhs_out) {
if (out_stride[0] == lhs_stride[0]) {
#pragma unroll
for (int d = 0; d < ndim; ++d) {
int64_t o_sh = out_shape[d];
int64_t o_st = out_stride[d];
int64_t rhs_sh = rhs_shape[d];
int64_t rhs_st = rhs_stride[d];
int64_t i = (idx / o_st) % o_sh;
/*
* Simplfied for rhs_out += min(i, rhs_sh - 1) * rhs_st;
* rhs_sh be o_sh or 1
*/
if (rhs_sh > i) {
*rhs_out += i * rhs_st;
}
}
*lhs_out = idx;
} else {
#pragma unroll
for (int d = 0; d < ndim; ++d) {
int64_t o_sh = out_shape[d];
int64_t o_st = out_stride[d];
int64_t lhs_sh = lhs_shape[d];
int64_t lhs_st = lhs_stride[d];
int64_t i = (idx / o_st) % o_sh;
/*
* Simplfied for lhs_out += min(i, lhs_sh - 1) * lhs_st;
* lhs_sh be o_sh or 1
*/
if (lhs_sh > i) {
*lhs_out += i * lhs_st;
}
}
*rhs_out = idx;
} }
} }
// Convert multi-dimension index to flattened index (assume row-major).
__device__ __forceinline__ int64_t Ravel(
const int64_t* idx, int ndim, const int64_t* shape, const int64_t* stride) {
int64_t out = 0;
for (int d = 0; d < ndim; ++d) {
out += min(idx[d], shape[d] - 1) * stride[d];
}
return out;
}
// Minigun UDF to compute binary reduce with broadcasting. // Minigun UDF to compute binary reduce with broadcasting.
template <int NDim, typename Idx, typename DType, typename Functors> template <int NDim, typename Idx, typename DType, typename Functors>
struct BinaryReduceBcast { struct BinaryReduceBcast {
...@@ -98,13 +130,14 @@ struct BinaryReduceBcast { ...@@ -98,13 +130,14 @@ struct BinaryReduceBcast {
DType* lhsoff = gdata->lhs_data + lid * gdata->lhs_len; DType* lhsoff = gdata->lhs_data + lid * gdata->lhs_len;
DType* rhsoff = gdata->rhs_data + rid * gdata->rhs_len; DType* rhsoff = gdata->rhs_data + rid * gdata->rhs_len;
DType* outoff = gdata->out_data + oid * gdata->out_len; DType* outoff = gdata->out_data + oid * gdata->out_len;
int64_t tmp[NDim]; // store unraveled idx.
while (tx < gdata->out_len) { while (tx < gdata->out_len) {
Unravel(tx, gdata->ndim, gdata->out_shape, gdata->out_stride, tmp); int64_t lhs_add = 0;
DType lhs = Functors::Read(lhsoff + int64_t rhs_add = 0;
Ravel(tmp, gdata->ndim, gdata->lhs_shape, gdata->lhs_stride)); UnravelRavel(tx, gdata->ndim, gdata->out_shape, gdata->out_stride,
DType rhs = Functors::Read(rhsoff + gdata->lhs_shape, gdata->lhs_stride,
Ravel(tmp, gdata->ndim, gdata->rhs_shape, gdata->rhs_stride)); gdata->rhs_shape, gdata->rhs_stride, &lhs_add, &rhs_add);
DType lhs = Functors::Read(lhsoff + lhs_add);
DType rhs = Functors::Read(rhsoff + rhs_add);
DType out = Functors::Op(lhs, rhs); DType out = Functors::Op(lhs, rhs);
Functors::Write(outoff + tx, out); Functors::Write(outoff + tx, out);
tx += stride_x; tx += stride_x;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment