device.cpp 3.43 KB
Newer Older
1
#include <chrono>
2
3
#include <assert.h>
#include <string.h>
4
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
5
6
#include "device.hpp"

7
#ifndef CK_NOGPU
Chao Liu's avatar
Chao Liu committed
8
9
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
{
JD's avatar
JD committed
10
    hip_check_error(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
Chao Liu's avatar
Chao Liu committed
11
12
13
14
}

void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }

Chao Liu's avatar
Chao Liu committed
15
16
std::size_t DeviceMem::GetBufferSize() { return mMemSize; }

Chao Liu's avatar
Chao Liu committed
17
18
void DeviceMem::ToDevice(const void* p)
{
JD's avatar
JD committed
19
    hip_check_error(hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
Chao Liu's avatar
Chao Liu committed
20
21
22
23
}

void DeviceMem::FromDevice(void* p)
{
JD's avatar
JD committed
24
    hip_check_error(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
Chao Liu's avatar
Chao Liu committed
25
26
}

JD's avatar
JD committed
27
void DeviceMem::SetZero() { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); }
Chao Liu's avatar
Chao Liu committed
28

JD's avatar
JD committed
29
DeviceMem::~DeviceMem() { hip_check_error(hipFree(mpDeviceBuf)); }
Chao Liu's avatar
Chao Liu committed
30
31
32
33
34

struct KernelTimerImpl
{
    KernelTimerImpl()
    {
JD's avatar
JD committed
35
36
        hip_check_error(hipEventCreate(&mStart));
        hip_check_error(hipEventCreate(&mEnd));
Chao Liu's avatar
Chao Liu committed
37
38
39
40
    }

    ~KernelTimerImpl()
    {
JD's avatar
JD committed
41
42
        hip_check_error(hipEventDestroy(mStart));
        hip_check_error(hipEventDestroy(mEnd));
Chao Liu's avatar
Chao Liu committed
43
44
45
46
    }

    void Start()
    {
JD's avatar
JD committed
47
48
        hip_check_error(hipDeviceSynchronize());
        hip_check_error(hipEventRecord(mStart, nullptr));
Chao Liu's avatar
Chao Liu committed
49
50
51
52
    }

    void End()
    {
JD's avatar
JD committed
53
54
        hip_check_error(hipEventRecord(mEnd, nullptr));
        hip_check_error(hipEventSynchronize(mEnd));
Chao Liu's avatar
Chao Liu committed
55
56
57
58
59
    }

    float GetElapsedTime() const
    {
        float time;
JD's avatar
JD committed
60
        hip_check_error(hipEventElapsedTime(&time, mStart, mEnd));
Chao Liu's avatar
Chao Liu committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
        return time;
    }

    hipEvent_t mStart, mEnd;
};

KernelTimer::KernelTimer() : impl(new KernelTimerImpl()) {}

KernelTimer::~KernelTimer() {}

void KernelTimer::Start() { impl->Start(); }

void KernelTimer::End() { impl->End(); }

float KernelTimer::GetElapsedTime() const { return impl->GetElapsedTime(); }
76
77
78
79
80
81
82
83
84
85
86
87
88
#endif

DeviceAlignedMemCPU::DeviceAlignedMemCPU(std::size_t mem_size, std::size_t alignment)
    : mMemSize(mem_size), mAlignment(alignment)
{
    if(mem_size == 0)
    {
        mpDeviceBuf = nullptr;
    }
    else
    {
        assert(!(alignment == 0 || (alignment & (alignment - 1)))); // check pow of 2

89
90
        // TODO: posix only
        int rtn = posix_memalign(&mpDeviceBuf, alignment, mem_size);
91

92
        assert(rtn == 0);
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    }
}

void* DeviceAlignedMemCPU::GetDeviceBuffer() { return mpDeviceBuf; }

std::size_t DeviceAlignedMemCPU::GetBufferSize() { return mMemSize; }

void DeviceAlignedMemCPU::ToDevice(const void* p) { memcpy(mpDeviceBuf, p, mMemSize); }

void DeviceAlignedMemCPU::FromDevice(void* p) { memcpy(p, mpDeviceBuf, mMemSize); }

void DeviceAlignedMemCPU::SetZero() { memset(mpDeviceBuf, 0, mMemSize); }

DeviceAlignedMemCPU::~DeviceAlignedMemCPU()
{
    if(mpDeviceBuf != nullptr)
109
        free(mpDeviceBuf);
110
}
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

struct WallTimerImpl
{
    void Start() { mStart = std::chrono::high_resolution_clock::now(); }

    void End() { mStop = std::chrono::high_resolution_clock::now(); }

    float GetElapsedTime() const
    {
        return static_cast<float>(
                   std::chrono::duration_cast<std::chrono::microseconds>(mStop - mStart).count()) *
               1e-3;
    }

    std::chrono::time_point<std::chrono::high_resolution_clock> mStart;
    std::chrono::time_point<std::chrono::high_resolution_clock> mStop;
};

WallTimer::WallTimer() : impl(new WallTimerImpl()) {}

WallTimer::~WallTimer() {}

void WallTimer::Start() { impl->Start(); }

void WallTimer::End() { impl->End(); }

float WallTimer::GetElapsedTime() const { return impl->GetElapsedTime(); }