device.cpp 2.62 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
2
3
4
5
#include "device.hpp"

DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
{
Chao Liu's avatar
Chao Liu committed
6
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
7
    hipGetErrorString(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
Chao Liu's avatar
Chao Liu committed
8
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
9
    cudaMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize);
Chao Liu's avatar
Chao Liu committed
10
11
12
13
14
15
16
#endif
}

void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }

void DeviceMem::ToDevice(const void* p)
{
Chao Liu's avatar
Chao Liu committed
17
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
18
19
    hipGetErrorString(
        hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
Chao Liu's avatar
Chao Liu committed
20
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
21
    cudaMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, cudaMemcpyHostToDevice);
Chao Liu's avatar
Chao Liu committed
22
23
24
25
26
#endif
}

void DeviceMem::FromDevice(void* p)
{
Chao Liu's avatar
Chao Liu committed
27
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
28
    hipGetErrorString(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
Chao Liu's avatar
Chao Liu committed
29
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
30
    cudaMemcpy(p, mpDeviceBuf, mMemSize, cudaMemcpyDeviceToHost);
Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
#endif
}

DeviceMem::~DeviceMem()
{
Chao Liu's avatar
Chao Liu committed
36
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
37
    hipGetErrorString(hipFree(mpDeviceBuf));
Chao Liu's avatar
Chao Liu committed
38
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
39
    cudaFree(mpDeviceBuf);
Chao Liu's avatar
Chao Liu committed
40
41
42
43
44
45
46
#endif
}

struct KernelTimerImpl
{
    KernelTimerImpl()
    {
Chao Liu's avatar
Chao Liu committed
47
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
48
49
        hipEventCreate(&mStart);
        hipEventCreate(&mEnd);
Chao Liu's avatar
Chao Liu committed
50
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
51
52
53
54
55
56
57
        cudaEventCreate(&mStart);
        cudaEventCreate(&mEnd);
#endif
    }

    ~KernelTimerImpl()
    {
Chao Liu's avatar
Chao Liu committed
58
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
59
60
        hipEventDestroy(mStart);
        hipEventDestroy(mEnd);
Chao Liu's avatar
Chao Liu committed
61
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
62
63
64
65
66
67
68
        cudaEventDestroy(mStart);
        cudaEventDestroy(mEnd);
#endif
    }

    void Start()
    {
Chao Liu's avatar
Chao Liu committed
69
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
70
        hipDeviceSynchronize();
Chao Liu's avatar
Chao Liu committed
71
        hipEventRecord(mStart, 0);
Chao Liu's avatar
Chao Liu committed
72
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
73
        cudaDeviceSynchronize();
Chao Liu's avatar
Chao Liu committed
74
75
76
77
78
79
        cudaEventRecord(mStart, 0);
#endif
    }

    void End()
    {
Chao Liu's avatar
Chao Liu committed
80
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
81
82
        hipEventRecord(mEnd, 0);
        hipEventSynchronize(mEnd);
Chao Liu's avatar
Chao Liu committed
83
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
84
85
86
87
88
89
90
91
        cudaEventRecord(mEnd, 0);
        cudaEventSynchronize(mEnd);
#endif
    }

    float GetElapsedTime() const
    {
        float time;
Chao Liu's avatar
Chao Liu committed
92
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
93
        hipEventElapsedTime(&time, mStart, mEnd);
Chao Liu's avatar
Chao Liu committed
94
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
95
96
97
98
99
        cudaEventElapsedTime(&time, mStart, mEnd);
#endif
        return time;
    }

Chao Liu's avatar
Chao Liu committed
100
#if CK_DEVICE_BACKEND_AMD
Chao Liu's avatar
Chao Liu committed
101
    hipEvent_t mStart, mEnd;
Chao Liu's avatar
Chao Liu committed
102
#elif CK_DEVICE_BACKEND_NVIDIA
Chao Liu's avatar
Chao Liu committed
103
104
105
106
107
108
109
110
111
112
113
114
115
    cudaEvent_t mStart, mEnd;
#endif
};

KernelTimer::KernelTimer() : impl(new KernelTimerImpl()) {}

KernelTimer::~KernelTimer() {}

void KernelTimer::Start() { impl->Start(); }

void KernelTimer::End() { impl->End(); }

float KernelTimer::GetElapsedTime() const { return impl->GetElapsedTime(); }