device.hpp 1.81 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
#ifndef DEVICE_HPP
#define DEVICE_HPP
3

Chao Liu's avatar
Chao Liu committed
4
#include <memory>
5
#include <functional>
Chao Liu's avatar
Chao Liu committed
6
7
#include <thread>
#include <chrono>
8
9
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
10

Chao Liu's avatar
Chao Liu committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
struct DeviceMem
{
    DeviceMem() = delete;
    DeviceMem(std::size_t mem_size);
    void* GetDeviceBuffer();
    void ToDevice(const void* p);
    void FromDevice(void* p);
    ~DeviceMem();

    void* mpDeviceBuf;
    std::size_t mMemSize;
};

struct KernelTimerImpl;

struct KernelTimer
{
    KernelTimer();
    ~KernelTimer();
    void Start();
    void End();
    float GetElapsedTime() const;

    std::unique_ptr<KernelTimerImpl> impl;
};

37
38
using device_stream_t = hipStream_t;

Chao Liu's avatar
Chao Liu committed
39
template <typename... Args, typename F>
Chao Liu's avatar
tidy  
Chao Liu committed
40
void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
41
{
Chao Liu's avatar
tidy  
Chao Liu committed
42
43
    hipStream_t stream_id = nullptr;

44
45
46
47
    hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
}

template <typename... Args, typename F>
Chao Liu's avatar
tidy  
Chao Liu committed
48
49
float launch_and_time_kernel(
    F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Chao Liu's avatar
Chao Liu committed
50
51
52
{
    KernelTimer timer;

Chao Liu's avatar
Chao Liu committed
53
    printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n",
Chao Liu's avatar
Chao Liu committed
54
55
56
57
58
59
60
61
62
           __func__,
           grid_dim.x,
           grid_dim.y,
           grid_dim.z,
           block_dim.x,
           block_dim.y,
           block_dim.z);

    printf("Warm up\n");
Chao Liu's avatar
Chao Liu committed
63

Chao Liu's avatar
tidy  
Chao Liu committed
64
65
    hipStream_t stream_id = nullptr;

Chao Liu's avatar
Chao Liu committed
66
    // warm up
67
    hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
Chao Liu's avatar
Chao Liu committed
68

Chao Liu's avatar
Chao Liu committed
69
    printf("Start running %d times...\n", nrepeat);
Chao Liu's avatar
Chao Liu committed
70

Chao Liu's avatar
Chao Liu committed
71
72
73
74
75
76
77
78
    timer.Start();

    for(int i = 0; i < nrepeat; ++i)
    {
        hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
    }

    timer.End();
79

Chao Liu's avatar
Chao Liu committed
80
81
    // std::this_thread::sleep_for (std::chrono::microseconds(10));

Chao Liu's avatar
Chao Liu committed
82
    return timer.GetElapsedTime() / nrepeat;
83
}
84
#endif