test_sampler.cc 4.36 KB
Newer Older
1
2
3
4
5
#include <gtest/gtest.h>
#include <vector>
#include <algorithm>
#include <iostream>
#include "./common.h"
6
#include "../../src/random/cpu/sample_utils.h"
7
8

using namespace dgl;
9
10
11
using namespace dgl::aten;

// TODO: adapt this to Random::Choice
12
13
14
15
16

template <typename Idx, typename DType>
void _TestWithReplacement(RandomEngine *re) {
  Idx n_categories = 100;
  Idx n_rolls = 1000000;
17
  std::vector<DType> _prob;
18
19
  DType accum = 0.;
  for (Idx i = 0; i < n_categories; ++i) {
20
21
    _prob.push_back(re->Uniform<DType>());
    accum += _prob.back();
22
23
  }
  for (Idx i = 0; i < n_categories; ++i)
24
25
    _prob[i] /= accum;
  FloatArray prob = NDArray::FromVector(_prob);
26

27
  auto _check_given_sampler = [n_categories, n_rolls, &_prob](
28
      utils::BaseSampler<Idx, DType, true> *s) {
29
30
    std::vector<Idx> counter(n_categories, 0);
    for (Idx i = 0; i < n_rolls; ++i) {
31
      Idx dice = s->Draw();
32
33
34
      counter[dice]++;
    }
    for (Idx i = 0; i < n_categories; ++i)
35
36
37
38
39
40
41
42
43
44
45
      ASSERT_NEAR(static_cast<DType>(counter[i]) / n_rolls, _prob[i], 1e-2);
  };

  auto _check_random_choice = [n_categories, n_rolls, &_prob, prob]() {
    std::vector<int64_t> counter(n_categories, 0);
    for (Idx i = 0; i < n_rolls; ++i) {
      Idx dice = RandomEngine::ThreadLocal()->Choice<int64_t>(prob);
      counter[dice]++;
    }
    for (Idx i = 0; i < n_categories; ++i)
      ASSERT_NEAR(static_cast<DType>(counter[i]) / n_rolls, _prob[i], 1e-2);
46
47
  };

48
49
50
  utils::AliasSampler<Idx, DType, true> as(re, prob);
  utils::CDFSampler<Idx, DType, true> cs(re, prob);
  utils::TreeSampler<Idx, DType, true> ts(re, prob);
51
52
53
  _check_given_sampler(&as);
  _check_given_sampler(&cs);
  _check_given_sampler(&ts);
54
  _check_random_choice();
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
}

TEST(SampleUtilsTest, TestWithReplacement) {
  RandomEngine* re = RandomEngine::ThreadLocal();
  re->SetSeed(42);
  _TestWithReplacement<int32_t, float>(re);
  re->SetSeed(42);
  _TestWithReplacement<int32_t, double>(re);
  re->SetSeed(42);
  _TestWithReplacement<int64_t, float>(re);
  re->SetSeed(42);
  _TestWithReplacement<int64_t, double>(re);
};

template <typename Idx, typename DType>
void _TestWithoutReplacementOrder(RandomEngine *re) {
71
72
73
  // TODO(BarclayII): is there a reliable way to do this test?
  std::vector<DType> _prob = {1e6, 1e-6, 1e-2, 1e2};
  FloatArray prob = NDArray::FromVector(_prob);
74
75
76
  std::vector<Idx> ground_truth = {0, 3, 2, 1};

  auto _check_given_sampler = [&ground_truth](
77
      utils::BaseSampler<Idx, DType, false> *s) {
78
    for (size_t i = 0; i < ground_truth.size(); ++i) {
79
      Idx dice = s->Draw();
80
81
82
83
      ASSERT_EQ(dice, ground_truth[i]);
    }
  };

84
85
86
  utils::AliasSampler<Idx, DType, false> as(re, prob);
  utils::CDFSampler<Idx, DType, false> cs(re, prob);
  utils::TreeSampler<Idx, DType, false> ts(re, prob);
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  _check_given_sampler(&as);
  _check_given_sampler(&cs);
  _check_given_sampler(&ts);
}

TEST(SampleUtilsTest, TestWithoutReplacementOrder) {
  RandomEngine* re = RandomEngine::ThreadLocal();
  re->SetSeed(42);
  _TestWithoutReplacementOrder<int32_t, float>(re);
  re->SetSeed(42);
  _TestWithoutReplacementOrder<int32_t, double>(re);
  re->SetSeed(42);
  _TestWithoutReplacementOrder<int64_t, float>(re);
  re->SetSeed(42);
  _TestWithoutReplacementOrder<int64_t, double>(re);
};

template <typename Idx, typename DType>
void _TestWithoutReplacementUnique(RandomEngine *re) {
  Idx N = 1000000;
107
  std::vector<DType> _likelihood;
108
  for (Idx i = 0; i < N; ++i)
109
110
    _likelihood.push_back(re->Uniform<DType>());
  FloatArray likelihood = NDArray::FromVector(_likelihood);
111
112

  auto _check_given_sampler = [N](
113
      utils::BaseSampler<Idx, DType, false> *s) {
114
115
    std::vector<int> cnt(N, 0);
    for (Idx i = 0; i < N; ++i) {
116
      Idx dice = s->Draw();
117
118
119
120
121
122
      cnt[dice]++;
    }
    for (Idx i = 0; i < N; ++i)
      ASSERT_EQ(cnt[i], 1);
  };

123
124
125
  utils::AliasSampler<Idx, DType, false> as(re, likelihood);
  utils::CDFSampler<Idx, DType, false> cs(re, likelihood);
  utils::TreeSampler<Idx, DType, false> ts(re, likelihood);
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
  _check_given_sampler(&as);
  _check_given_sampler(&cs);
  _check_given_sampler(&ts);
}

TEST(SampleUtilsTest, TestWithoutReplacementUnique) {
  RandomEngine* re = RandomEngine::ThreadLocal();
  re->SetSeed(42);
  _TestWithoutReplacementUnique<int32_t, float>(re);
  re->SetSeed(42);
  _TestWithoutReplacementUnique<int32_t, double>(re);
  re->SetSeed(42);
  _TestWithoutReplacementUnique<int64_t, float>(re);
  re->SetSeed(42);
  _TestWithoutReplacementUnique<int64_t, double>(re);
};