"integration-tests/vscode:/vscode.git/clone" did not exist on "8c3669b287a1c651cb07049e67f1ce5967828167"
magic_number_division.cpp 5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "config.hpp"
#include "print.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"

__global__ void gpu_magic_number_division(uint32_t magic_multiplier,
                                          uint32_t magic_shift,
                                          const int32_t* p_dividend,
                                          int32_t* p_result,
                                          uint64_t num)
{
    uint64_t global_thread_num = blockDim.x * gridDim.x;

    uint64_t global_thread_id = blockIdx.x * blockDim.x + threadIdx.x;

    for(uint64_t data_id = global_thread_id; data_id < num; data_id += global_thread_num)
    {
        p_result[data_id] =
            ck::MagicDivision::DoMagicDivision(p_dividend[data_id], magic_multiplier, magic_shift);
    }
}

__global__ void
gpu_naive_division(int32_t divisor, const int32_t* p_dividend, int32_t* p_result, uint64_t num)
{
    uint64_t global_thread_num = blockDim.x * gridDim.x;

    uint64_t global_thread_id = blockIdx.x * blockDim.x + threadIdx.x;

    for(uint64_t data_id = global_thread_id; data_id < num; data_id += global_thread_num)
    {
        p_result[data_id] = p_dividend[data_id] / divisor;
    }
}

Jianfeng Yan's avatar
Jianfeng Yan committed
44
45
46
47
48
49
50
51
52
53
54
55
56
__host__ void cpu_magic_number_division(uint32_t magic_multiplier,
                                        uint32_t magic_shift,
                                        const int32_t* p_dividend,
                                        int32_t* p_result,
                                        uint64_t num)
{
    for(uint64_t data_id = 0; data_id < num; ++data_id)
    {
        p_result[data_id] =
            ck::MagicDivision::DoMagicDivision(p_dividend[data_id], magic_multiplier, magic_shift);
    }
}

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
template <typename T>
T check_error(const std::vector<T>& ref, const std::vector<T>& result)
{
    T error     = 0;
    T max_diff  = 0;
    T ref_value = 0, result_value = 0;

    for(std::size_t i = 0; i < ref.size(); ++i)
    {
        T diff = std::abs(ref[i] - result[i]);
        error += diff;

        if(max_diff < diff)
        {
            max_diff     = diff;
            ref_value    = ref[i];
            result_value = result[i];
        }
    }

    return max_diff;
}

int main(int, char*[])
{
    uint64_t num_divisor  = 4096;
    uint64_t num_dividend = 1L << 16;

    std::vector<int32_t> divisors_host(num_divisor);
    std::vector<int32_t> dividends_host(num_dividend);

    // generate divisor
    for(uint64_t i = 0; i < num_divisor; ++i)
    {
        divisors_host[i] = i + 1;
    }

    // generate dividend
    for(uint64_t i = 0; i < num_divisor; ++i)
    {
        dividends_host[i] = i;
    }

    DeviceMem dividends_dev_buf(sizeof(int32_t) * num_dividend);
    DeviceMem naive_result_dev_buf(sizeof(int32_t) * num_dividend);
    DeviceMem magic_result_dev_buf(sizeof(int32_t) * num_dividend);

    std::vector<int32_t> naive_result_host(num_dividend);
    std::vector<int32_t> magic_result_host(num_dividend);
Jianfeng Yan's avatar
Jianfeng Yan committed
106
    std::vector<int32_t> magic_result_host2(num_dividend);
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

    dividends_dev_buf.ToDevice(dividends_host.data());

    bool pass = true;

    for(std::size_t i = 0; i < num_divisor; ++i)
    {
        // run naive division on GPU
        gpu_naive_division<<<1024, 256>>>(
            divisors_host[i],
            static_cast<const int32_t*>(dividends_dev_buf.GetDeviceBuffer()),
            static_cast<int32_t*>(naive_result_dev_buf.GetDeviceBuffer()),
            num_dividend);

        // calculate magic number
        uint32_t magic_multiplier, magic_shift;

        ck::tie(magic_multiplier, magic_shift) =
            ck::MagicDivision::CalculateMagicNumbers(divisors_host[i]);

        // run magic division on GPU
        gpu_magic_number_division<<<1024, 256>>>(
            magic_multiplier,
            magic_shift,
            static_cast<const int32_t*>(dividends_dev_buf.GetDeviceBuffer()),
            static_cast<int32_t*>(magic_result_dev_buf.GetDeviceBuffer()),
            num_dividend);

        naive_result_dev_buf.FromDevice(naive_result_host.data());
        magic_result_dev_buf.FromDevice(magic_result_host.data());

        int32_t max_diff = check_error(naive_result_host, magic_result_host);

        if(max_diff != 0)
        {
            pass = false;
            continue;
        }
Jianfeng Yan's avatar
Jianfeng Yan committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158

        cpu_magic_number_division(magic_multiplier,
                                  magic_shift,
                                  dividends_host.data(),
                                  magic_result_host2.data(),
                                  num_dividend);

        max_diff = check_error(naive_result_host, magic_result_host2);

        if(max_diff != 0)
        {
            pass = false;
            continue;
        }
159
160
161
162
163
    }

    if(pass)
    {
        std::cout << "test magic number division: Pass" << std::endl;
164
       return 0;
165
166
167
168
    }
    else
    {
        std::cout << "test magic number division: Fail" << std::endl;
169
       return -1;
170
171
172
    }

}