device.hpp 1.2 KB
Newer Older
1
2
3
#ifndef CK_DEVICE_HPP
#define CK_DEVICE_HPP

Chao Liu's avatar
Chao Liu committed
4
#include <memory>
Chao Liu's avatar
Chao Liu committed
5
#include "config.hpp"
6
7

using namespace ck;
Chao Liu's avatar
Chao Liu committed
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

struct DeviceMem
{
    DeviceMem() = delete;
    DeviceMem(std::size_t mem_size);
    void* GetDeviceBuffer();
    void ToDevice(const void* p);
    void FromDevice(void* p);
    ~DeviceMem();

    void* mpDeviceBuf;
    std::size_t mMemSize;
};

struct KernelTimerImpl;

struct KernelTimer
{
    KernelTimer();
    ~KernelTimer();
    void Start();
    void End();
    float GetElapsedTime() const;

    std::unique_ptr<KernelTimerImpl> impl;
};

Chao Liu's avatar
Chao Liu committed
35
template <typename... Args, typename F>
Chao Liu's avatar
Chao Liu committed
36
float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Chao Liu's avatar
Chao Liu committed
37
38
39
{
    KernelTimer timer;

Chao Liu's avatar
Chao Liu committed
40
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
41
42
    timer.Start();

Chao Liu's avatar
Chao Liu committed
43
    hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, 0, args...);
Chao Liu's avatar
Chao Liu committed
44
45
46
47

    timer.End();

    hipGetErrorString(hipGetLastError());
Chao Liu's avatar
Chao Liu committed
48
#elif CK_DEVICE_BACKEND_NVIDIA
49
50
    const void* f  = reinterpret_cast<const void*>(kernel);
    void* p_args[] = {&args...};
Chao Liu's avatar
Chao Liu committed
51
52
53

    timer.Start();

Chao Liu's avatar
Chao Liu committed
54
    cudaError_t error = cudaLaunchKernel(f, grid_dim, block_dim, p_args, lds_byte, 0);
Chao Liu's avatar
Chao Liu committed
55
56
57
58
59
60
61
62

    timer.End();

    checkCudaErrors(error);
#endif

    return timer.GetElapsedTime();
}
63
64

#endif