"...pipelines/alt_diffusion/pipeline_alt_diffusion.py" did not exist on "24895a1f494062d73028e31880c8848c6a674750"
Commit 0aa899aa authored by Jehandad Khan's avatar Jehandad Khan
Browse files

add hipEvent based timing to kernels

parent 44757d6b
...@@ -273,7 +273,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation ...@@ -273,7 +273,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0) std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
...@@ -336,6 +336,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation ...@@ -336,6 +336,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -376,6 +377,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation ...@@ -376,6 +377,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -394,9 +396,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation ...@@ -394,9 +396,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -312,7 +312,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add ...@@ -312,7 +312,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0) std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
...@@ -381,6 +381,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add ...@@ -381,6 +381,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -426,6 +427,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add ...@@ -426,6 +427,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -446,9 +448,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add ...@@ -446,9 +448,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -385,7 +385,7 @@ struct DeviceGemmXdlSplitK ...@@ -385,7 +385,7 @@ struct DeviceGemmXdlSplitK
std::cout << "arg.c_grid_desc_m_n_{ " << arg.c_grid_desc_m_n_.GetLength(I0) << ", " std::cout << "arg.c_grid_desc_m_n_{ " << arg.c_grid_desc_m_n_.GetLength(I0) << ", "
<< arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl; << arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl;
} }
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
const auto kbatch = arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I0); const auto kbatch = arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I0);
...@@ -417,6 +417,7 @@ struct DeviceGemmXdlSplitK ...@@ -417,6 +417,7 @@ struct DeviceGemmXdlSplitK
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -533,9 +534,9 @@ struct DeviceGemmXdlSplitK ...@@ -533,9 +534,9 @@ struct DeviceGemmXdlSplitK
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -391,7 +391,7 @@ struct DeviceGemmXdlSplitKCShuffle ...@@ -391,7 +391,7 @@ struct DeviceGemmXdlSplitKCShuffle
std::cout << "arg.c_grid_desc_m_n_{ " << arg.c_grid_desc_m_n_.GetLength(I0) << ", " std::cout << "arg.c_grid_desc_m_n_{ " << arg.c_grid_desc_m_n_.GetLength(I0) << ", "
<< arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl; << arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl;
} }
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
const auto kbatch = arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I0); const auto kbatch = arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I0);
...@@ -424,6 +424,7 @@ struct DeviceGemmXdlSplitKCShuffle ...@@ -424,6 +424,7 @@ struct DeviceGemmXdlSplitKCShuffle
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -544,9 +545,9 @@ struct DeviceGemmXdlSplitKCShuffle ...@@ -544,9 +545,9 @@ struct DeviceGemmXdlSplitKCShuffle
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -204,7 +204,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd ...@@ -204,7 +204,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
using gridwise_reduce = GridwiseReduction_mk_to_m_threadwise<InDataType, using gridwise_reduce = GridwiseReduction_mk_to_m_threadwise<InDataType,
OutDataType, OutDataType,
...@@ -247,6 +247,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd ...@@ -247,6 +247,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.a_grid_desc_m_k_, arg.a_grid_desc_m_k_,
arg.b_grid_desc_m_, arg.b_grid_desc_m_,
arg.in_element_op_, arg.in_element_op_,
...@@ -258,9 +259,9 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd ...@@ -258,9 +259,9 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
arg.p_out_indices_dev_); arg.p_out_indices_dev_);
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -198,7 +198,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl ...@@ -198,7 +198,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
const auto in_grid_desc_m_k = const auto in_grid_desc_m_k =
DeviceReduceBlockWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_); DeviceReduceBlockWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_);
...@@ -246,6 +246,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl ...@@ -246,6 +246,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
in_grid_desc_m_k, in_grid_desc_m_k,
out_grid_desc_m, out_grid_desc_m,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -260,9 +261,9 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl ...@@ -260,9 +261,9 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}; };
}; };
......
...@@ -175,7 +175,7 @@ struct DeviceReduceBlockWiseSecondCall ...@@ -175,7 +175,7 @@ struct DeviceReduceBlockWiseSecondCall
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
const auto in_grid_desc_m_k = DeviceReduceBlockWiseSecondCall::MakeSrc2dDescriptor( const auto in_grid_desc_m_k = DeviceReduceBlockWiseSecondCall::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_); arg.inLengths_, arg.inStrides_);
...@@ -223,6 +223,7 @@ struct DeviceReduceBlockWiseSecondCall ...@@ -223,6 +223,7 @@ struct DeviceReduceBlockWiseSecondCall
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
in_grid_desc_m_k, in_grid_desc_m_k,
out_grid_desc_m, out_grid_desc_m,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -237,9 +238,9 @@ struct DeviceReduceBlockWiseSecondCall ...@@ -237,9 +238,9 @@ struct DeviceReduceBlockWiseSecondCall
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}; };
}; };
......
...@@ -234,7 +234,7 @@ struct DeviceReduceMultiBlockAtomicAdd ...@@ -234,7 +234,7 @@ struct DeviceReduceMultiBlockAtomicAdd
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool = false)
{ {
const auto in_grid_desc_m_k = DeviceReduceMultiBlockAtomicAdd::MakeSrc2dDescriptor( const auto in_grid_desc_m_k = DeviceReduceMultiBlockAtomicAdd::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations); arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations);
...@@ -318,9 +318,9 @@ struct DeviceReduceMultiBlockAtomicAdd ...@@ -318,9 +318,9 @@ struct DeviceReduceMultiBlockAtomicAdd
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}; };
}; };
......
...@@ -259,7 +259,7 @@ struct DeviceReduceMultiBlockPartialReduce ...@@ -259,7 +259,7 @@ struct DeviceReduceMultiBlockPartialReduce
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
const auto in_grid_desc_m_k = DeviceReduceMultiBlockPartialReduce::MakeSrc2dDescriptor( const auto in_grid_desc_m_k = DeviceReduceMultiBlockPartialReduce::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations); arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations);
...@@ -305,6 +305,7 @@ struct DeviceReduceMultiBlockPartialReduce ...@@ -305,6 +305,7 @@ struct DeviceReduceMultiBlockPartialReduce
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
in_grid_desc_m_k, in_grid_desc_m_k,
ws_desc_m_k, ws_desc_m_k,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -318,9 +319,9 @@ struct DeviceReduceMultiBlockPartialReduce ...@@ -318,9 +319,9 @@ struct DeviceReduceMultiBlockPartialReduce
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}; };
}; };
......
...@@ -198,7 +198,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE ...@@ -198,7 +198,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
const auto in_grid_desc_m_k = const auto in_grid_desc_m_k =
DeviceReduceThreadWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_); DeviceReduceThreadWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_);
...@@ -246,6 +246,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE ...@@ -246,6 +246,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
in_grid_desc_m_k, in_grid_desc_m_k,
out_grid_desc_m, out_grid_desc_m,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -259,9 +260,9 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE ...@@ -259,9 +260,9 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
}; };
}; };
......
...@@ -28,8 +28,8 @@ struct DeviceConvFwdPtr_t ...@@ -28,8 +28,8 @@ struct DeviceConvFwdPtr_t
std::vector<ck::index_t> conv_filter_strides, std::vector<ck::index_t> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations, std::vector<ck::index_t> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads, std::vector<ck::index_t> input_left_pads,
std::vector<ck::index_t> input_right_pads); // in,wei and out element ops are ignored for now since even if we change them, they cant be linked std::vector<ck::index_t> input_right_pads) const; // in,wei and out element ops are ignored for now since even if we change them, they cant be linked
std::unique_ptr<BaseInvoker> MakeInvokerPointer(); // requires including BaseInvoker headers std::unique_ptr<BaseInvoker> MakeInvokerPointer() const; // requires including BaseInvoker headers
std::string GetTypeString(); std::string GetTypeString();
bool IsSupportedArgument(const BaseArgument* arg_ptr); bool IsSupportedArgument(const BaseArgument* arg_ptr);
}; };
......
#ifndef DEVICE_HPP #ifndef DEVICE_HPP
#define DEVICE_HPP #define DEVICE_HPP
#include "ck/options.hpp"
#include <memory> #include <memory>
#include <functional> #include <functional>
#include <thread> #include <thread>
...@@ -8,6 +10,39 @@ ...@@ -8,6 +10,39 @@
#include "hip/hip_runtime.h" #include "hip/hip_runtime.h"
#include "hip/hip_fp16.h" #include "hip/hip_fp16.h"
inline void hip_check(hipError_t x)
{
if(x != hipSuccess)
throw std::runtime_error("Failed to run HIP call");
}
template<typename F, F f>
struct managed_deleter
{
template<typename T>
void operator()(T * t)
{
if(t != nullptr)
{
std::ignore = f(t);
}
}
};
template<typename T, typename F, F f>
using managed_pointer = std::unique_ptr<T, managed_deleter<F, f>>;
using hipEventPtr = managed_pointer<typename std::remove_pointer<hipEvent_t>::type, decltype(&hipEventDestroy), hipEventDestroy>;
inline hipEventPtr make_hip_event()
{
hipEvent_t result = nullptr;
hip_check(hipEventCreate(&result));
return hipEventPtr{result};
}
struct DeviceMem struct DeviceMem
{ {
DeviceMem() = delete; DeviceMem() = delete;
...@@ -44,9 +79,9 @@ void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte ...@@ -44,9 +79,9 @@ void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte
template <typename... Args, typename F> template <typename... Args, typename F>
float launch_and_time_kernel( float launch_and_time_kernel(
F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, hipStream_t stream_id, Args... args) F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, hipStream_t stream_id, bool measure_time, Args... args)
{ {
#if 1 #if CK_TIME_KERNELS
KernelTimer timer; KernelTimer timer;
printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n", printf("%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d} \n",
...@@ -78,9 +113,27 @@ float launch_and_time_kernel( ...@@ -78,9 +113,27 @@ float launch_and_time_kernel(
return timer.GetElapsedTime() / nrepeat; return timer.GetElapsedTime() / nrepeat;
#else #else
std::ignore = nrepeat;
hipEventPtr start = nullptr;
hipEventPtr stop = nullptr;
float elapsed_time = 0.0f;
if(measure_time)
{
start = make_hip_event();
stop = make_hip_event();
hip_check(hipEventRecord(start.get(), stream_id));
}
launch_kernel(kernel, grid_dim, block_dim, lds_byte, stream_id, args...); launch_kernel(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
return 0; if(measure_time)
{
hip_check(hipEventRecord(stop.get(), stream_id));
hip_check(hipEventSynchronize(stop.get()));
hip_check(hipEventElapsedTime(&elapsed_time, start.get(), stop.get()));
}
return elapsed_time;
#endif #endif
} }
#endif #endif
...@@ -84,7 +84,7 @@ struct ReferenceBatchedGemm : public device::BaseOperator ...@@ -84,7 +84,7 @@ struct ReferenceBatchedGemm : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -114,7 +114,7 @@ struct ReferenceConvWrw : public device::BaseOperator ...@@ -114,7 +114,7 @@ struct ReferenceConvWrw : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -129,7 +129,7 @@ struct ReferenceConvBwdData : public device::BaseOperator ...@@ -129,7 +129,7 @@ struct ReferenceConvBwdData : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -171,7 +171,7 @@ struct ReferenceConvFwd : public device::BaseOperator ...@@ -171,7 +171,7 @@ struct ReferenceConvFwd : public device::BaseOperator
} }
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -117,7 +117,7 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator ...@@ -117,7 +117,7 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -123,7 +123,7 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator ...@@ -123,7 +123,7 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -82,7 +82,7 @@ struct ReferenceGemm : public device::BaseOperator ...@@ -82,7 +82,7 @@ struct ReferenceGemm : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -82,7 +82,7 @@ struct ReferenceGemmBias2D : public device::BaseOperator ...@@ -82,7 +82,7 @@ struct ReferenceGemmBias2D : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override float Run(const device::BaseArgument* p_arg, int, hipStream_t, bool) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment