device.cpp 2.58 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include "composable_kernel/utility/config.hpp"
Chao Liu's avatar
Chao Liu committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "device.hpp"

DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
{
#if DEVICE_BACKEND_HIP
    hipGetErrorString(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
#elif DEVICE_BACKEND_CUDA
    checkCudaErrors(cudaMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
#endif
}

void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }

void DeviceMem::ToDevice(const void* p)
{
#if DEVICE_BACKEND_HIP
    hipGetErrorString(
        hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
#elif DEVICE_BACKEND_CUDA
    checkCudaErrors(
        cudaMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, cudaMemcpyHostToDevice));
#endif
}

void DeviceMem::FromDevice(void* p)
{
#if DEVICE_BACKEND_HIP
    hipGetErrorString(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
#elif DEVICE_BACKEND_CUDA
    checkCudaErrors(cudaMemcpy(p, mpDeviceBuf, mMemSize, cudaMemcpyDeviceToHost));
#endif
}

DeviceMem::~DeviceMem()
{
#if DEVICE_BACKEND_HIP
    hipGetErrorString(hipFree(mpDeviceBuf));
#elif DEVICE_BACKEND_CUDA
    checkCudaErrors(cudaFree(mpDeviceBuf));
#endif
}

struct KernelTimerImpl
{
    KernelTimerImpl()
    {
#if DEVICE_BACKEND_HIP
        hipEventCreate(&mStart);
        hipEventCreate(&mEnd);
#elif DEVICE_BACKEND_CUDA
        cudaEventCreate(&mStart);
        cudaEventCreate(&mEnd);
#endif
    }

    ~KernelTimerImpl()
    {
#if DEVICE_BACKEND_HIP
        hipEventDestroy(mStart);
        hipEventDestroy(mEnd);
#elif DEVICE_BACKEND_CUDA
        cudaEventDestroy(mStart);
        cudaEventDestroy(mEnd);
#endif
    }

    void Start()
    {
#if DEVICE_BACKEND_HIP
        hipEventRecord(mStart, 0);
#elif DEVICE_BACKEND_CUDA
        cudaEventRecord(mStart, 0);
#endif
    }

    void End()
    {
#if DEVICE_BACKEND_HIP
        hipEventRecord(mEnd, 0);
        hipEventSynchronize(mEnd);
#elif DEVICE_BACKEND_CUDA
        cudaEventRecord(mEnd, 0);
        cudaEventSynchronize(mEnd);
#endif
    }

    float GetElapsedTime() const
    {
        float time;
#if DEVICE_BACKEND_HIP
        hipEventElapsedTime(&time, mStart, mEnd);
#elif DEVICE_BACKEND_CUDA
        cudaEventElapsedTime(&time, mStart, mEnd);
#endif
        return time;
    }

#if DEVICE_BACKEND_HIP
    hipEvent_t mStart, mEnd;
#elif DEVICE_BACKEND_CUDA
    cudaEvent_t mStart, mEnd;
#endif
};

KernelTimer::KernelTimer() : impl(new KernelTimerImpl()) {}

KernelTimer::~KernelTimer() {}

void KernelTimer::Start() { impl->Start(); }

void KernelTimer::End() { impl->End(); }

float KernelTimer::GetElapsedTime() const { return impl->GetElapsedTime(); }