device.hpp 1.1 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
#pragma once
#include <memory>
Chao Liu's avatar
Chao Liu committed
3
#include "config.h"
Chao Liu's avatar
Chao Liu committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

struct DeviceMem
{
    DeviceMem() = delete;
    DeviceMem(std::size_t mem_size);
    void* GetDeviceBuffer();
    void ToDevice(const void* p);
    void FromDevice(void* p);
    ~DeviceMem();

    void* mpDeviceBuf;
    std::size_t mMemSize;
};

struct KernelTimerImpl;

struct KernelTimer
{
    KernelTimer();
    ~KernelTimer();
    void Start();
    void End();
    float GetElapsedTime() const;

    std::unique_ptr<KernelTimerImpl> impl;
};

Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
template <typename... Args, typename F>
float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args)
{
    KernelTimer timer;

#if DEVICE_BACKEND_HIP
    timer.Start();

    hipLaunchKernelGGL(kernel, grid_dim, block_dim, 0, 0, args...);

    timer.End();

    hipGetErrorString(hipGetLastError());
#elif DEVICE_BACKEND_CUDA
    const void* f = reinterpret_cast<const void*>(kernel);
    void* p_args  = {&args...};

    timer.Start();

    cudaError_t error = cudaLaunchKernel(f, grid_dim, block_dim, p_args, 0, 0);

    timer.End();

    checkCudaErrors(error);
#endif

    return timer.GetElapsedTime();
}