timer.hpp 2.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include "ck_tile/core/config.hpp"
#include "ck_tile/host/hip_check_error.hpp"
#include <hip/hip_runtime.h>
#include <cstddef>
#include <chrono>

namespace ck_tile {

struct gpu_timer
{
    CK_TILE_HOST gpu_timer()
    {
        HIP_CHECK_ERROR(hipEventCreate(&start_evt));
        HIP_CHECK_ERROR(hipEventCreate(&stop_evt));
    }

    CK_TILE_HOST ~gpu_timer() noexcept(false)
    {
        HIP_CHECK_ERROR(hipEventDestroy(start_evt));
        HIP_CHECK_ERROR(hipEventDestroy(stop_evt));
    }

    CK_TILE_HOST void start(const hipStream_t& s)
    {
        HIP_CHECK_ERROR(hipDeviceSynchronize());
        HIP_CHECK_ERROR(hipEventRecord(start_evt, s));
    }

    CK_TILE_HOST void stop(const hipStream_t& s)
    {
        HIP_CHECK_ERROR(hipEventRecord(stop_evt, s));
        HIP_CHECK_ERROR(hipEventSynchronize(stop_evt));
    }
    // return in ms
    CK_TILE_HOST float duration() const
    {
        float ms = 0;
        HIP_CHECK_ERROR(hipEventElapsedTime(&ms, start_evt, stop_evt));
        return ms;
    }

    private:
    hipEvent_t start_evt, stop_evt;
};

struct cpu_timer
{
    // torch.utils.benchmark.Timer(), there is a sync inside each timer callback
    CK_TILE_HOST void start(const hipStream_t&)
    {
        HIP_CHECK_ERROR(hipDeviceSynchronize());
        start_tick = std::chrono::high_resolution_clock::now();
    }
    // torch.utils.benchmark.Timer(), there is a sync inside each timer callback
    CK_TILE_HOST void stop(const hipStream_t&)
    {
        HIP_CHECK_ERROR(hipDeviceSynchronize());
        stop_tick = std::chrono::high_resolution_clock::now();
    }
    // return in ms
    CK_TILE_HOST float duration() const
    {
        double sec =
            std::chrono::duration_cast<std::chrono::duration<double>>(stop_tick - start_tick)
                .count();
        return static_cast<float>(sec * 1e3);
    }

    private:
    std::chrono::time_point<std::chrono::high_resolution_clock> start_tick;
    std::chrono::time_point<std::chrono::high_resolution_clock> stop_tick;
};

} // namespace ck_tile