device.hpp 1.1 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
#pragma once
#include <memory>
Chao Liu's avatar
Chao Liu committed
3
#include "config.h"
Chao Liu's avatar
Chao Liu committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

struct DeviceMem
{
    DeviceMem() = delete;
    DeviceMem(std::size_t mem_size);
    void* GetDeviceBuffer();
    void ToDevice(const void* p);
    void FromDevice(void* p);
    ~DeviceMem();

    void* mpDeviceBuf;
    std::size_t mMemSize;
};

struct KernelTimerImpl;

struct KernelTimer
{
    KernelTimer();
    ~KernelTimer();
    void Start();
    void End();
    float GetElapsedTime() const;

    std::unique_ptr<KernelTimerImpl> impl;
};

Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
template <typename... Args, typename F>
float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args)
{
    KernelTimer timer;

#if DEVICE_BACKEND_HIP
    timer.Start();

    hipLaunchKernelGGL(kernel, grid_dim, block_dim, 0, 0, args...);

    timer.End();

    hipGetErrorString(hipGetLastError());
#elif DEVICE_BACKEND_CUDA
    const void* f = reinterpret_cast<const void*>(kernel);
Chao Liu's avatar
Chao Liu committed
46
    void* p_args[]  = {&args...};
Chao Liu's avatar
Chao Liu committed
47
48
49
50
51
52
53
54
55
56
57
58

    timer.Start();

    cudaError_t error = cudaLaunchKernel(f, grid_dim, block_dim, p_args, 0, 0);

    timer.End();

    checkCudaErrors(error);
#endif

    return timer.GetElapsedTime();
}