Commit 0aa899aa authored by Jehandad Khan's avatar Jehandad Khan
Browse files

add hipEvent based timing to kernels

parent 44757d6b
...@@ -27,6 +27,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) ...@@ -27,6 +27,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF)
message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
option(CK_TIME_KERNELS "Time every kernel and log parameters" OFF)
## OpenMP ## OpenMP
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# workaround issue hipcc in rocm3.5 cannot find openmp # workaround issue hipcc in rocm3.5 cannot find openmp
...@@ -227,7 +229,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) ...@@ -227,7 +229,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
configure_file("${PROJECT_SOURCE_DIR}/include/ck/hip_version.hpp.in" "${PROJECT_BINARY_DIR}/include/ck/hip_version.hpp") configure_file("${PROJECT_SOURCE_DIR}/include/ck/options.hpp.in" "${PROJECT_BINARY_DIR}/include/ck/options.hpp")
include_directories(BEFORE include_directories(BEFORE
${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/include
......
cmake_minimum_required(VERSION 3.15) cmake_minimum_required(VERSION 3.15)
project(ck_app) project(ck_app)
add_compile_options(-std=c++14)
# add_link_options(--offload-arch=gfx908)
set(CMAKE_CXX_STANDARD 14) #set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED True) #set(CMAKE_CXX_STANDARD_REQUIRED True)
find_package(composable_kernel 1.0.0 COMPONENTS device_operations host_tensor) find_package(composable_kernel 1.0.0 COMPONENTS device_operations host_tensor)
find_package(hip REQUIRED PATHS /opt/rocm) find_package(hip REQUIRED PATHS /opt/rocm)
......
...@@ -28,6 +28,36 @@ enum ConvOutputLayout ...@@ -28,6 +28,36 @@ enum ConvOutputLayout
NHWK, // 1 NHWK, // 1
}; };
// Code to check CUDA errors
void check_cuda_error(void)
{
hipError_t err = hipGetLastError();
if (err != hipSuccess)
{
std::cerr
<< "Error: "
<< hipGetErrorString(err)
<< std::endl;
exit(err);
}
}
std::string getDeviceName(int device)
{
struct hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, device);
check_cuda_error();
return std::string(prop.name);
}
int getDriver(void)
{
int driver;
hipDriverGetVersion(&driver);
check_cuda_error();
return driver;
}
namespace ck { namespace ck {
namespace app { namespace app {
...@@ -127,6 +157,14 @@ void profile_conv_fwd_impl(int do_verification, ...@@ -127,6 +157,14 @@ void profile_conv_fwd_impl(int do_verification,
float best_ave_time = 0; float best_ave_time = 0;
float best_tflops = 0; float best_tflops = 0;
float best_gb_per_sec = 0; float best_gb_per_sec = 0;
int deviceIndex = 0;
hipSetDevice(deviceIndex);
check_cuda_error();
hipStream_t stream_id = nullptr;
hipStreamCreate(&stream_id);
check_cuda_error();
// profile device Conv instances // profile device Conv instances
for(auto& conv_ptr : conv_ptrs) for(auto& conv_ptr : conv_ptrs)
...@@ -151,8 +189,7 @@ void profile_conv_fwd_impl(int do_verification, ...@@ -151,8 +189,7 @@ void profile_conv_fwd_impl(int do_verification,
if(conv_ptr.IsSupportedArgument(argument_ptr.get())) if(conv_ptr.IsSupportedArgument(argument_ptr.get()))
{ {
std::string conv_name = conv_ptr.GetTypeString(); std::string conv_name = conv_ptr.GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat, stream_id, true);
float ave_time = invoker_ptr->Run(argument_ptr.get(), nrepeat, nullptr);
std::size_t flop = std::size_t(2) * N * K * Ho * Wo * C * Y * X; std::size_t flop = std::size_t(2) * N * K * Ho * Wo * C * Y * X;
......
...@@ -22,6 +22,7 @@ function(add_example_executable EXAMPLE_NAME) ...@@ -22,6 +22,7 @@ function(add_example_executable EXAMPLE_NAME)
message("adding example ${EXAMPLE_NAME}") message("adding example ${EXAMPLE_NAME}")
add_executable(${EXAMPLE_NAME} ${ARGN}) add_executable(${EXAMPLE_NAME} ${ARGN})
target_link_libraries(${EXAMPLE_NAME} PRIVATE host_tensor) target_link_libraries(${EXAMPLE_NAME} PRIVATE host_tensor)
set_target_properties(${EXAMPLE_NAME} PROPERTIES EXCLUDE_FROM_ALL 1)
add_dependencies(examples ${EXAMPLE_NAME}) add_dependencies(examples ${EXAMPLE_NAME})
endfunction(add_example_executable EXAMPLE_NAME) endfunction(add_example_executable EXAMPLE_NAME)
......
#pragma once
// "_PACKAGE_" to avoid name contentions: the macros like
// HIP_VERSION_MAJOR are defined in HIP_VERSION.h.
// clang-format off
#define CK_HIP_PACKAGE_VERSION_MAJOR @CK_HIP_VERSION_MAJOR@
#define CK_HIP_PACKAGE_VERSION_MINOR @CK_HIP_VERSION_MINOR@
#define CK_HIP_PACKAGE_VERSION_PATCH @CK_HIP_VERSION_PATCH@
// clang-format on
#ifndef CK_HIP_PACKAGE_VERSION_MAJOR
#define CK_HIP_PACKAGE_VERSION_MAJOR 0
#endif
#ifndef CK_HIP_PACKAGE_VERSION_MINOR
#define CK_HIP_PACKAGE_VERSION_MINOR 0
#endif
#ifndef CK_HIP_PACKAGE_VERSION_PATCH
#define CK_HIP_PACKAGE_VERSION_PATCH 0
#endif
// 3 decimal digits for major and minor, 6 digits for patch number.
// Max number is 999,999,999999 == 0xE8,D4A5,0FFF that fits into 64-bit math.
#if CK_HIP_PACKAGE_VERSION_MAJOR > 999 || CK_HIP_PACKAGE_VERSION_MAJOR > 999 || \
CK_HIP_PACKAGE_VERSION_PATCH > 999999
#error "Too big HIP version number(s)"
#endif
#define CK_HIP_PACKAGE_VERSION_FLAT \
((CK_HIP_PACKAGE_VERSION_MAJOR * 1000ULL + CK_HIP_PACKAGE_VERSION_MINOR) * 1000000 + \
CK_HIP_PACKAGE_VERSION_PATCH)
#pragma once
#cmakedefine01 CK_TIME_KERNELS
\ No newline at end of file
...@@ -22,7 +22,7 @@ struct BaseInvoker ...@@ -22,7 +22,7 @@ struct BaseInvoker
BaseInvoker(const BaseInvoker&) = default; BaseInvoker(const BaseInvoker&) = default;
BaseInvoker& operator=(const BaseInvoker&) = default; BaseInvoker& operator=(const BaseInvoker&) = default;
virtual float Run(const BaseArgument*, int = 1, hipStream_t = nullptr){return -1;} virtual float Run(const BaseArgument*, int = 1, hipStream_t = nullptr, bool = false){return -1;}
virtual ~BaseInvoker() {} virtual ~BaseInvoker() {}
}; };
......
...@@ -274,7 +274,7 @@ struct DeviceBatchedGemmXdl ...@@ -274,7 +274,7 @@ struct DeviceBatchedGemmXdl
{ {
using Argument = DeviceBatchedGemmXdl::Argument; using Argument = DeviceBatchedGemmXdl::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "arg.a_grid_desc_g_k0_m_k1_{" std::cout << "arg.a_grid_desc_g_k0_m_k1_{"
...@@ -336,6 +336,7 @@ struct DeviceBatchedGemmXdl ...@@ -336,6 +336,7 @@ struct DeviceBatchedGemmXdl
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -369,6 +370,7 @@ struct DeviceBatchedGemmXdl ...@@ -369,6 +370,7 @@ struct DeviceBatchedGemmXdl
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -385,9 +387,9 @@ struct DeviceBatchedGemmXdl ...@@ -385,9 +387,9 @@ struct DeviceBatchedGemmXdl
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -414,7 +414,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W ...@@ -414,7 +414,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
<< arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl; << arg.c_grid_desc_m_n_.GetLength(I1) << "}" << std::endl;
} }
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
ShowInfo(arg); ShowInfo(arg);
if(!GridwiseGemm::CheckValidity(arg.a_grid_desc_kbatch_k0_m_k1_, if(!GridwiseGemm::CheckValidity(arg.a_grid_desc_kbatch_k0_m_k1_,
...@@ -445,6 +445,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W ...@@ -445,6 +445,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -561,9 +562,9 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W ...@@ -561,9 +562,9 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return ave_time; return ave_time;
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -521,7 +521,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -521,7 +521,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
nrepeat = 1; nrepeat = 1;
float ave_time = 0; float ave_time = 0;
...@@ -600,6 +600,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -600,6 +600,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -634,6 +635,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -634,6 +635,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -649,9 +651,9 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -649,9 +651,9 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time; return ave_time;
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -642,7 +642,7 @@ struct ...@@ -642,7 +642,7 @@ struct
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
#if 0 #if 0
{ {
...@@ -734,6 +734,7 @@ struct ...@@ -734,6 +734,7 @@ struct
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -779,6 +780,7 @@ struct ...@@ -779,6 +780,7 @@ struct
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -798,9 +800,9 @@ struct ...@@ -798,9 +800,9 @@ struct
return ave_time; return ave_time;
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -607,7 +607,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X ...@@ -607,7 +607,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
#if 0 #if 0
{ {
...@@ -693,6 +693,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X ...@@ -693,6 +693,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -733,6 +734,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X ...@@ -733,6 +734,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -750,9 +752,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X ...@@ -750,9 +752,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
return ave_time; return ave_time;
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -568,7 +568,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W ...@@ -568,7 +568,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
#if 0 #if 0
{ {
...@@ -670,6 +670,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W ...@@ -670,6 +670,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -705,6 +706,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W ...@@ -705,6 +706,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -720,9 +722,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W ...@@ -720,9 +722,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return ave_time; return ave_time;
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -450,7 +450,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -450,7 +450,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
#if 0 #if 0
{ {
...@@ -506,6 +506,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -506,6 +506,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -538,6 +539,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -538,6 +539,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -553,9 +555,9 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -553,9 +555,9 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time; return ave_time;
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -98,7 +98,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W ...@@ -98,7 +98,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
const auto naive_conv3d_fwd = const auto naive_conv3d_fwd =
ref::naive_conv_fwd_ndhwc_kzyxc_ndhwk<InDataType, ref::naive_conv_fwd_ndhwc_kzyxc_ndhwk<InDataType,
...@@ -115,6 +115,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W ...@@ -115,6 +115,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
dim3(256), dim3(256),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_in_, arg.p_in_,
arg.p_wei_, arg.p_wei_,
arg.p_out_, arg.p_out_,
...@@ -144,9 +145,9 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W ...@@ -144,9 +145,9 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -430,7 +430,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_ ...@@ -430,7 +430,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "num_batches_of_GEMM = " << arg.num_subbatches_ << std::endl; std::cout << "num_batches_of_GEMM = " << arg.num_subbatches_ << std::endl;
...@@ -485,6 +485,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_ ...@@ -485,6 +485,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -521,6 +522,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_ ...@@ -521,6 +522,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -541,9 +543,9 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_ ...@@ -541,9 +543,9 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -591,7 +591,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -591,7 +591,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{ {
using Argument = DeviceOp::Argument; using Argument = DeviceOp::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0) std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
...@@ -645,6 +645,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -645,6 +645,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -677,6 +678,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -677,6 +678,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -692,9 +694,9 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K ...@@ -692,9 +694,9 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return ave_time; return ave_time;
} }
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -290,7 +290,7 @@ struct DeviceGemmXdl ...@@ -290,7 +290,7 @@ struct DeviceGemmXdl
{ {
using Argument = DeviceGemmXdl::Argument; using Argument = DeviceGemmXdl::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0) std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
...@@ -344,6 +344,7 @@ struct DeviceGemmXdl ...@@ -344,6 +344,7 @@ struct DeviceGemmXdl
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -376,6 +377,7 @@ struct DeviceGemmXdl ...@@ -376,6 +377,7 @@ struct DeviceGemmXdl
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -392,9 +394,9 @@ struct DeviceGemmXdl ...@@ -392,9 +394,9 @@ struct DeviceGemmXdl
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -249,7 +249,7 @@ struct DeviceGemmXdl_C_Shuffle ...@@ -249,7 +249,7 @@ struct DeviceGemmXdl_C_Shuffle
{ {
using Argument = DeviceGemmXdl_C_Shuffle::Argument; using Argument = DeviceGemmXdl_C_Shuffle::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0) std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
...@@ -306,6 +306,7 @@ struct DeviceGemmXdl_C_Shuffle ...@@ -306,6 +306,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -341,6 +342,7 @@ struct DeviceGemmXdl_C_Shuffle ...@@ -341,6 +342,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -357,9 +359,9 @@ struct DeviceGemmXdl_C_Shuffle ...@@ -357,9 +359,9 @@ struct DeviceGemmXdl_C_Shuffle
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
...@@ -268,7 +268,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d ...@@ -268,7 +268,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
{ {
using Argument = DeviceGemmXdl_C_Shuffle_Bias_2d::Argument; using Argument = DeviceGemmXdl_C_Shuffle_Bias_2d::Argument;
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false)
{ {
{ {
std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0) std::cout << "arg.a_grid_desc_k0_m_k1_{" << arg.a_grid_desc_k0_m_k1_.GetLength(I0)
...@@ -331,6 +331,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d ...@@ -331,6 +331,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -371,6 +372,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d ...@@ -371,6 +372,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id, stream_id,
measure_time,
arg.p_a_grid_, arg.p_a_grid_,
arg.p_b_grid_, arg.p_b_grid_,
arg.p_c_grid_, arg.p_c_grid_,
...@@ -389,9 +391,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d ...@@ -389,9 +391,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
} }
// polymorphic // polymorphic
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr, bool measure_time = false) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id, measure_time);
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment