device.hpp 1.13 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
#pragma once
#include <memory>
Chao Liu's avatar
Chao Liu committed
3
#include "config.h"
Chao Liu's avatar
Chao Liu committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

struct DeviceMem
{
    DeviceMem() = delete;
    DeviceMem(std::size_t mem_size);
    void* GetDeviceBuffer();
    void ToDevice(const void* p);
    void FromDevice(void* p);
    ~DeviceMem();

    void* mpDeviceBuf;
    std::size_t mMemSize;
};

struct KernelTimerImpl;

struct KernelTimer
{
    KernelTimer();
    ~KernelTimer();
    void Start();
    void End();
    float GetElapsedTime() const;

    std::unique_ptr<KernelTimerImpl> impl;
};

Chao Liu's avatar
Chao Liu committed
31
template <typename... Args, typename F>
Chao Liu's avatar
Chao Liu committed
32
float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Chao Liu's avatar
Chao Liu committed
33
34
35
36
37
38
{
    KernelTimer timer;

#if DEVICE_BACKEND_HIP
    timer.Start();

Chao Liu's avatar
Chao Liu committed
39
    hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, 0, args...);
Chao Liu's avatar
Chao Liu committed
40
41
42
43
44

    timer.End();

    hipGetErrorString(hipGetLastError());
#elif DEVICE_BACKEND_CUDA
45
46
    const void* f  = reinterpret_cast<const void*>(kernel);
    void* p_args[] = {&args...};
Chao Liu's avatar
Chao Liu committed
47
48
49

    timer.Start();

Chao Liu's avatar
Chao Liu committed
50
    cudaError_t error = cudaLaunchKernel(f, grid_dim, block_dim, p_args, lds_byte, 0);
Chao Liu's avatar
Chao Liu committed
51
52
53
54
55
56
57
58

    timer.End();

    checkCudaErrors(error);
#endif

    return timer.GetElapsedTime();
}