Unverified Commit 5dd35580 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Feature] Improve sampling speed; Better pickle/unpickle; other fixes (#1299)

* improve performance of sample_neighbors

* some more improve

* test script

* benchmarks

* multi process

* update more tests

* WIP

* adding two API for state saving

* add create from state

* upd test

* missing file

* wip: pickle/unpickle

* more c apis

* find the problem of empty data array

* add null array; pickling speed is bad

* still bad perf

* still bad perf

* wip

* fix the pickle speed test; now everything looks good

* minor fix

* bugfix

* some lint fix

* address comments

* more fix

* fix lint

* add utest for random.choice

* add utest for dgl.rand_graph

* fix cpp utests

* try fix ci

* fix bug in TF backend

* upd choice docstring

* address comments

* upd

* try fix compile

* add comment
parent 00ba4094
...@@ -268,3 +268,4 @@ if __name__ == '__main__': ...@@ -268,3 +268,4 @@ if __name__ == '__main__':
test_pickling_graph() test_pickling_graph()
test_pickling_nodeflow() test_pickling_nodeflow()
test_pickling_batched_graph() test_pickling_batched_graph()
test_pickling_heterograph()
import dgl
import backend as F
import numpy as np
import unittest
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random choice not implemented")
def test_random_choice():
# test 1
a = F.arange(0, 100)
x = dgl.random.choice(a, 10, replace=True, prob=None)
assert len(x) == 10
for i in range(len(x)):
assert x[i] >= 0 and x[i] < 100
# test 2, replace=False, small num
a = F.arange(0, 100)
x = dgl.random.choice(a, 10, replace=False, prob=None)
assert len(x) == 10
for i in range(len(x)):
assert x[i] >= 0 and x[i] < 100
# test 3, replace=False, large num
a = F.arange(0, 100)
x = dgl.random.choice(a, 100, replace=False, prob=None)
assert len(x) == 100
assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a))
# test 4, first arg is integer
x = dgl.random.choice(100, 100, replace=False, prob=None)
assert len(x) == 100
assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a))
# test 5, with prob
prob = np.ones((100,))
prob[37:40] = 0.
prob -= prob.min()
prob /= prob.sum()
prob = F.tensor(prob)
x = dgl.random.choice(100, 97, replace=False, prob=prob)
assert len(x) == 97
for i in range(len(x)):
assert x[i] < 37 or x[i] >= 40
if __name__ == '__main__':
test_random_choice()
...@@ -271,7 +271,7 @@ def _test_sample_neighbors(hypersparse): ...@@ -271,7 +271,7 @@ def _test_sample_neighbors(hypersparse):
# test different fanouts for different relations # test different fanouts for different relations
for i in range(10): for i in range(10):
subg = dgl.sampling.sample_neighbors(hg, {'user' : [0,1], 'game' : 0}, [1, 2, 0, 2]) subg = dgl.sampling.sample_neighbors(hg, {'user' : [0,1], 'game' : 0}, [1, 2, 0, 2], replace=True)
assert len(subg.ntypes) == 3 assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4 assert len(subg.etypes) == 4
assert subg['follow'].number_of_edges() == 2 assert subg['follow'].number_of_edges() == 2
......
...@@ -137,7 +137,7 @@ TEST(RowwiseTest, TestCSRSampling) { ...@@ -137,7 +137,7 @@ TEST(RowwiseTest, TestCSRSampling) {
template <typename Idx, typename FloatType> template <typename Idx, typename FloatType>
void _TestCSRSamplingUniform(bool has_data) { void _TestCSRSamplingUniform(bool has_data) {
auto mat = CSR<Idx>(has_data); auto mat = CSR<Idx>(has_data);
FloatArray prob; FloatArray prob = aten::NullArray();
IdArray rows = NDArray::FromVector(std::vector<Idx>({0, 3})); IdArray rows = NDArray::FromVector(std::vector<Idx>({0, 3}));
for (int k = 0; k < 10; ++k) { for (int k = 0; k < 10; ++k) {
auto rst = CSRRowWiseSampling(mat, rows, 2, prob, true); auto rst = CSRRowWiseSampling(mat, rows, 2, prob, true);
...@@ -229,7 +229,7 @@ TEST(RowwiseTest, TestCOOSampling) { ...@@ -229,7 +229,7 @@ TEST(RowwiseTest, TestCOOSampling) {
template <typename Idx, typename FloatType> template <typename Idx, typename FloatType>
void _TestCOOSamplingUniform(bool has_data) { void _TestCOOSamplingUniform(bool has_data) {
auto mat = COO<Idx>(has_data); auto mat = COO<Idx>(has_data);
FloatArray prob; FloatArray prob = aten::NullArray();
IdArray rows = NDArray::FromVector(std::vector<Idx>({0, 3})); IdArray rows = NDArray::FromVector(std::vector<Idx>({0, 3}));
for (int k = 0; k < 10; ++k) { for (int k = 0; k < 10; ++k) {
auto rst = COORowWiseSampling(mat, rows, 2, prob, true); auto rst = COORowWiseSampling(mat, rows, 2, prob, true);
......
...@@ -14,14 +14,12 @@ aten::CSRMatrix CSR1() { ...@@ -14,14 +14,12 @@ aten::CSRMatrix CSR1() {
// [0, 0, 1, 1, 0], // [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]] // [0, 0, 0, 0, 0]]
// data: [0, 2, 3, 1, 4] // data: [0, 2, 3, 1, 4]
aten::CSRMatrix csr; return aten::CSRMatrix(
csr.num_rows = 4; 4, 5,
csr.num_cols = 5; aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 5, 5}), sizeof(IDX)*8, CTX),
csr.indptr = aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 5, 5}), sizeof(IDX)*8, CTX); aten::VecToIdArray(std::vector<IDX>({1, 2, 0, 2, 3}), sizeof(IDX)*8, CTX),
csr.indices = aten::VecToIdArray(std::vector<IDX>({1, 2, 0, 2, 3}), sizeof(IDX)*8, CTX); aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 1, 4}), sizeof(IDX)*8, CTX),
csr.data = aten::VecToIdArray(std::vector<IDX>({0, 2, 3, 1, 4}), sizeof(IDX)*8, CTX); false);
csr.sorted = false;
return csr;
} }
template <typename IDX> template <typename IDX>
...@@ -32,14 +30,12 @@ aten::CSRMatrix CSR2() { ...@@ -32,14 +30,12 @@ aten::CSRMatrix CSR2() {
// [0, 0, 1, 1, 0], // [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]] // [0, 0, 0, 0, 0]]
// data: [0, 2, 5, 3, 1, 4] // data: [0, 2, 5, 3, 1, 4]
aten::CSRMatrix csr; return aten::CSRMatrix(
csr.num_rows = 4; 4, 5,
csr.num_cols = 5; aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, CTX),
csr.indptr = aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, CTX); aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX),
csr.indices = aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX); aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX),
csr.data = aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX); false);
csr.sorted = false;
return csr;
} }
template <typename IDX> template <typename IDX>
...@@ -51,12 +47,10 @@ aten::COOMatrix COO1() { ...@@ -51,12 +47,10 @@ aten::COOMatrix COO1() {
// data: [0, 2, 3, 1, 4] // data: [0, 2, 3, 1, 4]
// row : [0, 2, 0, 1, 2] // row : [0, 2, 0, 1, 2]
// col : [1, 2, 2, 0, 3] // col : [1, 2, 2, 0, 3]
aten::COOMatrix coo; return aten::COOMatrix(
coo.num_rows = 4; 4, 5,
coo.num_cols = 5; aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2}), sizeof(IDX)*8, CTX),
coo.row = aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2}), sizeof(IDX)*8, CTX); aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3}), sizeof(IDX)*8, CTX));
coo.col = aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3}), sizeof(IDX)*8, CTX);
return coo;
} }
template <typename IDX> template <typename IDX>
...@@ -69,12 +63,10 @@ aten::COOMatrix COO2() { ...@@ -69,12 +63,10 @@ aten::COOMatrix COO2() {
// data: [0, 2, 5, 3, 1, 4] // data: [0, 2, 5, 3, 1, 4]
// row : [0, 2, 0, 1, 2, 0] // row : [0, 2, 0, 1, 2, 0]
// col : [1, 2, 2, 0, 3, 2] // col : [1, 2, 2, 0, 3, 2]
aten::COOMatrix coo; return aten::COOMatrix(
coo.num_rows = 4; 4, 5,
coo.num_cols = 5; aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2, 0}), sizeof(IDX)*8, CTX),
coo.row = aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2, 0}), sizeof(IDX)*8, CTX); aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3, 2}), sizeof(IDX)*8, CTX));
coo.col = aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3, 2}), sizeof(IDX)*8, CTX);
return coo;
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment