gpu_stream.hpp 2.19 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

#pragma once

#include <getopt.h>
#include <iostream>
#include <memory>
#include <variant>
#include <vector>

#include <cuda.h>
#include <cuda_runtime.h>
#include <numa.h>

#include "gpu_stream_kernels.hpp"
#include "gpu_stream_utils.hpp"

#define NON_HIP (!defined(__HIP_PLATFORM_HCC__) && !defined(__HCC__) && !defined(__HIPCC__))

using namespace stream_config;

class GpuStream {
  public:
    GpuStream() = delete;            // Delete default constructor
    GpuStream(Opts &) noexcept;      // Constructor
    ~GpuStream() noexcept = default; // Destructor

    GpuStream(const GpuStream &) = delete;
    GpuStream &operator=(const GpuStream &) = delete;
    GpuStream(GpuStream &&) noexcept = default;
    GpuStream &operator=(GpuStream &&) noexcept = default;

    int Run();

  private:
    using BenchArgsVariant = std::variant<std::unique_ptr<BenchArgs<double>>>;
    std::vector<BenchArgsVariant> bench_args_;
    Opts opts_;

    // Memory management functions
    template <typename T> cudaError_t GpuMallocDataBuf(T **, uint64_t);
    template <typename T> int PrepareValidationBuf(std::unique_ptr<BenchArgs<T>> &);
    template <typename T> int CheckBuf(std::unique_ptr<BenchArgs<T>> &, int);

    template <typename T> int PrepareEvent(std::unique_ptr<BenchArgs<T>> &);
    template <typename T> int PrepareBufAndStream(std::unique_ptr<BenchArgs<T>> &);

    template <typename T> int DestroyEvent(std::unique_ptr<BenchArgs<T>> &);
    template <typename T> int DestroyBufAndStream(std::unique_ptr<BenchArgs<T>> &);
    template <typename T> int Destroy(std::unique_ptr<BenchArgs<T>> &);

    // Benchmark functions
    template <typename T> int RunStreamKernel(std::unique_ptr<BenchArgs<T>> &, Kernel, int);
55
56
    float GetActualMemoryClockRate(int gpu_id);
    template <typename T> int RunStream(std::unique_ptr<BenchArgs<T>> &, const std::string &data_type, float peak_bw);
57
58
59
60

    // Helper functions
    int GetGpuCount(int *);
    int SetGpu(int gpu_id);
61
62
63
    float GetMemoryClockRate(int device_id, const cudaDeviceProp &prop);
    void PrintCudaDeviceInfo(int device_id, const cudaDeviceProp &prop, float memory_clock_mhz, float peak_bw);
};