device.hpp 1.79 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
#ifndef DEVICE_HPP
#define DEVICE_HPP
3

Chao Liu's avatar
Chao Liu committed
4
#include <memory>
Chao Liu's avatar
Chao Liu committed
5
6
#include <thread>
#include <chrono>
7
8
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
9

Chao Liu's avatar
Chao Liu committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
struct DeviceMem
{
    DeviceMem() = delete;
    DeviceMem(std::size_t mem_size);
    void* GetDeviceBuffer();
    void ToDevice(const void* p);
    void FromDevice(void* p);
    ~DeviceMem();

    void* mpDeviceBuf;
    std::size_t mMemSize;
};

struct KernelTimerImpl;

struct KernelTimer
{
    KernelTimer();
    ~KernelTimer();
    void Start();
    void End();
    float GetElapsedTime() const;

    std::unique_ptr<KernelTimerImpl> impl;
};

36
37
using device_stream_t = hipStream_t;

Chao Liu's avatar
Chao Liu committed
38
template <typename... Args, typename F>
Chao Liu's avatar
tidy  
Chao Liu committed
39
void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
40
{
Chao Liu's avatar
tidy  
Chao Liu committed
41
42
    hipStream_t stream_id = nullptr;

43
44
45
46
    hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
}

template <typename... Args, typename F>
Chao Liu's avatar
tidy  
Chao Liu committed
47
48
float launch_and_time_kernel(
    F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Chao Liu's avatar
Chao Liu committed
49
50
51
{
    KernelTimer timer;

Chao Liu's avatar
Chao Liu committed
52
    printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n",
Chao Liu's avatar
Chao Liu committed
53
54
55
56
57
58
59
60
61
           __func__,
           grid_dim.x,
           grid_dim.y,
           grid_dim.z,
           block_dim.x,
           block_dim.y,
           block_dim.z);

    printf("Warm up\n");
Chao Liu's avatar
Chao Liu committed
62

Chao Liu's avatar
tidy  
Chao Liu committed
63
64
    hipStream_t stream_id = nullptr;

Chao Liu's avatar
Chao Liu committed
65
    // warm up
66
    hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
Chao Liu's avatar
Chao Liu committed
67

Chao Liu's avatar
Chao Liu committed
68
    printf("Start running %d times...\n", nrepeat);
Chao Liu's avatar
Chao Liu committed
69

Chao Liu's avatar
Chao Liu committed
70
71
72
73
74
75
76
77
    timer.Start();

    for(int i = 0; i < nrepeat; ++i)
    {
        hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
    }

    timer.End();
78

Chao Liu's avatar
Chao Liu committed
79
80
    // std::this_thread::sleep_for (std::chrono::microseconds(10));

Chao Liu's avatar
Chao Liu committed
81
    return timer.GetElapsedTime() / nrepeat;
82
83
}

84
#endif