device.cpp 2.7 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
2
3
4
5
#include "device.hpp"

DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
{
Chao Liu's avatar
Chao Liu committed
6
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
7
    hipGetErrorString(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
Chao Liu's avatar
Chao Liu committed
8
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
9
10
11
12
13
14
15
16
    checkCudaErrors(cudaMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
#endif
}

void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }

void DeviceMem::ToDevice(const void* p)
{
Chao Liu's avatar
Chao Liu committed
17
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
18
19
    hipGetErrorString(
        hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
Chao Liu's avatar
Chao Liu committed
20
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
21
22
23
24
25
26
27
    checkCudaErrors(
        cudaMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, cudaMemcpyHostToDevice));
#endif
}

void DeviceMem::FromDevice(void* p)
{
Chao Liu's avatar
Chao Liu committed
28
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
29
    hipGetErrorString(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
Chao Liu's avatar
Chao Liu committed
30
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
36
    checkCudaErrors(cudaMemcpy(p, mpDeviceBuf, mMemSize, cudaMemcpyDeviceToHost));
#endif
}

DeviceMem::~DeviceMem()
{
Chao Liu's avatar
Chao Liu committed
37
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
38
    hipGetErrorString(hipFree(mpDeviceBuf));
Chao Liu's avatar
Chao Liu committed
39
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
40
41
42
43
44
45
46
47
    checkCudaErrors(cudaFree(mpDeviceBuf));
#endif
}

struct KernelTimerImpl
{
    KernelTimerImpl()
    {
Chao Liu's avatar
Chao Liu committed
48
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
49
50
        hipEventCreate(&mStart);
        hipEventCreate(&mEnd);
Chao Liu's avatar
Chao Liu committed
51
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
52
53
54
55
56
57
58
        cudaEventCreate(&mStart);
        cudaEventCreate(&mEnd);
#endif
    }

    ~KernelTimerImpl()
    {
Chao Liu's avatar
Chao Liu committed
59
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
60
61
        hipEventDestroy(mStart);
        hipEventDestroy(mEnd);
Chao Liu's avatar
Chao Liu committed
62
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
63
64
65
66
67
68
69
        cudaEventDestroy(mStart);
        cudaEventDestroy(mEnd);
#endif
    }

    void Start()
    {
Chao Liu's avatar
Chao Liu committed
70
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
71
        hipDeviceSynchronize();
Chao Liu's avatar
Chao Liu committed
72
        hipEventRecord(mStart, 0);
Chao Liu's avatar
Chao Liu committed
73
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
74
        cudaDeviceSynchronize();
Chao Liu's avatar
Chao Liu committed
75
76
77
78
79
80
        cudaEventRecord(mStart, 0);
#endif
    }

    void End()
    {
Chao Liu's avatar
Chao Liu committed
81
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
82
83
        hipEventRecord(mEnd, 0);
        hipEventSynchronize(mEnd);
Chao Liu's avatar
Chao Liu committed
84
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
85
86
87
88
89
90
91
92
        cudaEventRecord(mEnd, 0);
        cudaEventSynchronize(mEnd);
#endif
    }

    float GetElapsedTime() const
    {
        float time;
Chao Liu's avatar
Chao Liu committed
93
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
94
        hipEventElapsedTime(&time, mStart, mEnd);
Chao Liu's avatar
Chao Liu committed
95
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
96
97
98
99
100
        cudaEventElapsedTime(&time, mStart, mEnd);
#endif
        return time;
    }

Chao Liu's avatar
Chao Liu committed
101
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
102
    hipEvent_t mStart, mEnd;
Chao Liu's avatar
Chao Liu committed
103
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
104
105
106
107
108
109
110
111
112
113
114
115
116
    cudaEvent_t mStart, mEnd;
#endif
};

KernelTimer::KernelTimer() : impl(new KernelTimerImpl()) {}

KernelTimer::~KernelTimer() {}

void KernelTimer::Start() { impl->Start(); }

void KernelTimer::End() { impl->End(); }

float KernelTimer::GetElapsedTime() const { return impl->GetElapsedTime(); }