// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include #include #include #include #include #include #include #include #include "gpu_stream_kernels.hpp" #include "gpu_stream_utils.hpp" #define NON_HIP (!defined(__HIP_PLATFORM_HCC__) && !defined(__HCC__) && !defined(__HIPCC__)) using namespace stream_config; class GpuStream { public: GpuStream() = delete; // Delete default constructor GpuStream(Opts &) noexcept; // Constructor ~GpuStream() noexcept = default; // Destructor GpuStream(const GpuStream &) = delete; GpuStream &operator=(const GpuStream &) = delete; GpuStream(GpuStream &&) noexcept = default; GpuStream &operator=(GpuStream &&) noexcept = default; int Run(); private: using BenchArgsVariant = std::variant>>; std::vector bench_args_; Opts opts_; // Memory management functions template cudaError_t GpuMallocDataBuf(T **, uint64_t); template int PrepareValidationBuf(std::unique_ptr> &); template int CheckBuf(std::unique_ptr> &, int); template int PrepareEvent(std::unique_ptr> &); template int PrepareBufAndStream(std::unique_ptr> &); template int DestroyEvent(std::unique_ptr> &); template int DestroyBufAndStream(std::unique_ptr> &); template int Destroy(std::unique_ptr> &); // Benchmark functions template int RunStreamKernel(std::unique_ptr> &, Kernel, int); float GetActualMemoryClockRate(int gpu_id); template int RunStream(std::unique_ptr> &, const std::string &data_type, float peak_bw); // Helper functions int GetGpuCount(int *); int SetGpu(int gpu_id); float GetMemoryClockRate(int device_id, const cudaDeviceProp &prop); void PrintCudaDeviceInfo(int device_id, const cudaDeviceProp &prop, float memory_clock_mhz, float peak_bw); };