Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0aa899aa
Commit
0aa899aa
authored
Apr 06, 2022
by
Jehandad Khan
Browse files
add hipEvent based timing to kernels
parent
44757d6b
Changes
46
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
114 additions
and
73 deletions
+114
-73
CMakeLists.txt
CMakeLists.txt
+3
-1
example/14_client_app/CMakeLists.txt
example/14_client_app/CMakeLists.txt
+4
-2
example/14_client_app/client_app_impl.hpp
example/14_client_app/client_app_impl.hpp
+39
-2
example/CMakeLists.txt
example/CMakeLists.txt
+1
-0
include/ck/hip_version.hpp.in
include/ck/hip_version.hpp.in
+0
-28
include/ck/options.hpp.in
include/ck/options.hpp.in
+3
-0
include/ck/tensor_operation/gpu/device/device_base.hpp
include/ck/tensor_operation/gpu/device/device_base.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
...k/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...e_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
.../gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
..._fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
...nv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
...ation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp
.../gpu/device/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
...on/gpu/device/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
...ation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl.hpp
include/ck/tensor_operation/gpu/device/device_gemm_xdl.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle.hpp
...tensor_operation/gpu/device/device_gemm_xdl_c_shuffle.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp
...peration/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp
+5
-3
No files found.
CMakeLists.txt
View file @
0aa899aa
...
@@ -27,6 +27,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
...
@@ -27,6 +27,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
message
(
"CMAKE_CXX_COMPILER_ID:
${
CMAKE_CXX_COMPILER_ID
}
"
)
message
(
"CMAKE_CXX_COMPILER_ID:
${
CMAKE_CXX_COMPILER_ID
}
"
)
option
(
CK_TIME_KERNELS
"Time every kernel and log parameters"
OFF
)
## OpenMP
## OpenMP
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"Clang"
)
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"Clang"
)
# workaround issue hipcc in rocm3.5 cannot find openmp
# workaround issue hipcc in rocm3.5 cannot find openmp
...
@@ -227,7 +229,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
...
@@ -227,7 +229,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/bin
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/bin
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/
hip_vers
ion.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/
hip_vers
ion.hpp"
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/
opt
ion
s
.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/
opt
ion
s
.hpp"
)
include_directories
(
BEFORE
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/include
...
...
example/14_client_app/CMakeLists.txt
View file @
0aa899aa
cmake_minimum_required
(
VERSION 3.15
)
cmake_minimum_required
(
VERSION 3.15
)
project
(
ck_app
)
project
(
ck_app
)
add_compile_options
(
-std=c++14
)
# add_link_options(--offload-arch=gfx908)
set
(
CMAKE_CXX_STANDARD 14
)
#
set(CMAKE_CXX_STANDARD 14)
set
(
CMAKE_CXX_STANDARD_REQUIRED True
)
#
set(CMAKE_CXX_STANDARD_REQUIRED True)
find_package
(
composable_kernel 1.0.0 COMPONENTS device_operations host_tensor
)
find_package
(
composable_kernel 1.0.0 COMPONENTS device_operations host_tensor
)
find_package
(
hip REQUIRED PATHS /opt/rocm
)
find_package
(
hip REQUIRED PATHS /opt/rocm
)
...
...
example/14_client_app/client_app_impl.hpp
View file @
0aa899aa
...
@@ -28,6 +28,36 @@ enum ConvOutputLayout
...
@@ -28,6 +28,36 @@ enum ConvOutputLayout
NHWK
,
// 1
NHWK
,
// 1
};
};
// Code to check CUDA errors
void
check_cuda_error
(
void
)
{
hipError_t
err
=
hipGetLastError
();
if
(
err
!=
hipSuccess
)
{
std
::
cerr
<<
"Error: "
<<
hipGetErrorString
(
err
)
<<
std
::
endl
;
exit
(
err
);
}
}
std
::
string
getDeviceName
(
int
device
)
{
struct
hipDeviceProp_t
prop
;
hipGetDeviceProperties
(
&
prop
,
device
);
check_cuda_error
();
return
std
::
string
(
prop
.
name
);
}
int
getDriver
(
void
)
{
int
driver
;
hipDriverGetVersion
(
&
driver
);
check_cuda_error
();
return
driver
;
}
namespace
ck
{
namespace
ck
{
namespace
app
{
namespace
app
{
...
@@ -127,6 +157,14 @@ void profile_conv_fwd_impl(int do_verification,
...
@@ -127,6 +157,14 @@ void profile_conv_fwd_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
deviceIndex
=
0
;
hipSetDevice
(
deviceIndex
);
check_cuda_error
();
hipStream_t
stream_id
=
nullptr
;
hipStreamCreate
(
&
stream_id
);
check_cuda_error
();
// profile device Conv instances
// profile device Conv instances
for
(
auto
&
conv_ptr
:
conv_ptrs
)
for
(
auto
&
conv_ptr
:
conv_ptrs
)
...
@@ -151,8 +189,7 @@ void profile_conv_fwd_impl(int do_verification,
...
@@ -151,8 +189,7 @@ void profile_conv_fwd_impl(int do_verification,
if
(
conv_ptr
.
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
conv_ptr
.
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
std
::
string
conv_name
=
conv_ptr
.
GetTypeString
();
std
::
string
conv_name
=
conv_ptr
.
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
,
stream_id
,
true
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
,
nullptr
);
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
;
...
...
example/CMakeLists.txt
View file @
0aa899aa
...
@@ -22,6 +22,7 @@ function(add_example_executable EXAMPLE_NAME)
...
@@ -22,6 +22,7 @@ function(add_example_executable EXAMPLE_NAME)
message
(
"adding example
${
EXAMPLE_NAME
}
"
)
message
(
"adding example
${
EXAMPLE_NAME
}
"
)
add_executable
(
${
EXAMPLE_NAME
}
${
ARGN
}
)
add_executable
(
${
EXAMPLE_NAME
}
${
ARGN
}
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE host_tensor
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE host_tensor
)
set_target_properties
(
${
EXAMPLE_NAME
}
PROPERTIES EXCLUDE_FROM_ALL 1
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
endfunction
(
add_example_executable EXAMPLE_NAME
)
endfunction
(
add_example_executable EXAMPLE_NAME
)
...
...
include/ck/hip_version.hpp.in
deleted
100644 → 0
View file @
44757d6b
#pragma once
// "_PACKAGE_" to avoid name contentions: the macros like
// HIP_VERSION_MAJOR are defined in HIP_VERSION.h.
// clang-format off
#define CK_HIP_PACKAGE_VERSION_MAJOR @CK_HIP_VERSION_MAJOR@
#define CK_HIP_PACKAGE_VERSION_MINOR @CK_HIP_VERSION_MINOR@
#define CK_HIP_PACKAGE_VERSION_PATCH @CK_HIP_VERSION_PATCH@
// clang-format on
#ifndef CK_HIP_PACKAGE_VERSION_MAJOR
#define CK_HIP_PACKAGE_VERSION_MAJOR 0
#endif
#ifndef CK_HIP_PACKAGE_VERSION_MINOR
#define CK_HIP_PACKAGE_VERSION_MINOR 0
#endif
#ifndef CK_HIP_PACKAGE_VERSION_PATCH
#define CK_HIP_PACKAGE_VERSION_PATCH 0
#endif
// 3 decimal digits for major and minor, 6 digits for patch number.
// Max number is 999,999,999999 == 0xE8,D4A5,0FFF that fits into 64-bit math.
#if CK_HIP_PACKAGE_VERSION_MAJOR > 999 || CK_HIP_PACKAGE_VERSION_MAJOR > 999 || \
CK_HIP_PACKAGE_VERSION_PATCH > 999999
#error "Too big HIP version number(s)"
#endif
#define CK_HIP_PACKAGE_VERSION_FLAT \
((CK_HIP_PACKAGE_VERSION_MAJOR * 1000ULL + CK_HIP_PACKAGE_VERSION_MINOR) * 1000000 + \
CK_HIP_PACKAGE_VERSION_PATCH)
include/ck/options.hpp.in
0 → 100644
View file @
0aa899aa
#pragma once
#cmakedefine01 CK_TIME_KERNELS
\ No newline at end of file
include/ck/tensor_operation/gpu/device/device_base.hpp
View file @
0aa899aa
...
@@ -22,7 +22,7 @@ struct BaseInvoker
...
@@ -22,7 +22,7 @@ struct BaseInvoker
BaseInvoker
(
const
BaseInvoker
&
)
=
default
;
BaseInvoker
(
const
BaseInvoker
&
)
=
default
;
BaseInvoker
&
operator
=
(
const
BaseInvoker
&
)
=
default
;
BaseInvoker
&
operator
=
(
const
BaseInvoker
&
)
=
default
;
virtual
float
Run
(
const
BaseArgument
*
,
int
=
1
,
hipStream_t
=
nullptr
){
return
-
1
;}
virtual
float
Run
(
const
BaseArgument
*
,
int
=
1
,
hipStream_t
=
nullptr
,
bool
=
false
){
return
-
1
;}
virtual
~
BaseInvoker
()
{}
virtual
~
BaseInvoker
()
{}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
View file @
0aa899aa
...
@@ -274,7 +274,7 @@ struct DeviceBatchedGemmXdl
...
@@ -274,7 +274,7 @@ struct DeviceBatchedGemmXdl
{
{
using
Argument
=
DeviceBatchedGemmXdl
::
Argument
;
using
Argument
=
DeviceBatchedGemmXdl
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_g_k0_m_k1_{"
std
::
cout
<<
"arg.a_grid_desc_g_k0_m_k1_{"
...
@@ -336,6 +336,7 @@ struct DeviceBatchedGemmXdl
...
@@ -336,6 +336,7 @@ struct DeviceBatchedGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -369,6 +370,7 @@ struct DeviceBatchedGemmXdl
...
@@ -369,6 +370,7 @@ struct DeviceBatchedGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -385,9 +387,9 @@ struct DeviceBatchedGemmXdl
...
@@ -385,9 +387,9 @@ struct DeviceBatchedGemmXdl
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -414,7 +414,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -414,7 +414,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
}
}
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
ShowInfo
(
arg
);
ShowInfo
(
arg
);
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_kbatch_k0_m_k1_
,
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_kbatch_k0_m_k1_
,
...
@@ -445,6 +445,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -445,6 +445,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -561,9 +562,9 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -561,9 +562,9 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -521,7 +521,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -521,7 +521,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
nrepeat
=
1
;
nrepeat
=
1
;
float
ave_time
=
0
;
float
ave_time
=
0
;
...
@@ -600,6 +600,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -600,6 +600,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -634,6 +635,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -634,6 +635,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -649,9 +651,9 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -649,9 +651,9 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -642,7 +642,7 @@ struct
...
@@ -642,7 +642,7 @@ struct
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -734,6 +734,7 @@ struct
...
@@ -734,6 +734,7 @@ struct
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -779,6 +780,7 @@ struct
...
@@ -779,6 +780,7 @@ struct
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -798,9 +800,9 @@ struct
...
@@ -798,9 +800,9 @@ struct
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -607,7 +607,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -607,7 +607,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -693,6 +693,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -693,6 +693,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -733,6 +734,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -733,6 +734,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -750,9 +752,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -750,9 +752,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -568,7 +568,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -568,7 +568,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -670,6 +670,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -670,6 +670,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -705,6 +706,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -705,6 +706,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -720,9 +722,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -720,9 +722,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -450,7 +450,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -450,7 +450,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -506,6 +506,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -506,6 +506,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -538,6 +539,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -538,6 +539,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -553,9 +555,9 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -553,9 +555,9 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp
View file @
0aa899aa
...
@@ -98,7 +98,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
...
@@ -98,7 +98,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
naive_conv3d_fwd
=
const
auto
naive_conv3d_fwd
=
ref
::
naive_conv_fwd_ndhwc_kzyxc_ndhwk
<
InDataType
,
ref
::
naive_conv_fwd_ndhwc_kzyxc_ndhwk
<
InDataType
,
...
@@ -115,6 +115,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
...
@@ -115,6 +115,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
dim3
(
256
),
dim3
(
256
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_in_
,
arg
.
p_in_
,
arg
.
p_wei_
,
arg
.
p_wei_
,
arg
.
p_out_
,
arg
.
p_out_
,
...
@@ -144,9 +145,9 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
...
@@ -144,9 +145,9 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
View file @
0aa899aa
...
@@ -430,7 +430,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -430,7 +430,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"num_batches_of_GEMM = "
<<
arg
.
num_subbatches_
<<
std
::
endl
;
std
::
cout
<<
"num_batches_of_GEMM = "
<<
arg
.
num_subbatches_
<<
std
::
endl
;
...
@@ -485,6 +485,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -485,6 +485,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -521,6 +522,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -521,6 +522,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -541,9 +543,9 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -541,9 +543,9 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -591,7 +591,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -591,7 +591,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -645,6 +645,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -645,6 +645,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -677,6 +678,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -677,6 +678,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -692,9 +694,9 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -692,9 +694,9 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl.hpp
View file @
0aa899aa
...
@@ -290,7 +290,7 @@ struct DeviceGemmXdl
...
@@ -290,7 +290,7 @@ struct DeviceGemmXdl
{
{
using
Argument
=
DeviceGemmXdl
::
Argument
;
using
Argument
=
DeviceGemmXdl
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -344,6 +344,7 @@ struct DeviceGemmXdl
...
@@ -344,6 +344,7 @@ struct DeviceGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -376,6 +377,7 @@ struct DeviceGemmXdl
...
@@ -376,6 +377,7 @@ struct DeviceGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -392,9 +394,9 @@ struct DeviceGemmXdl
...
@@ -392,9 +394,9 @@ struct DeviceGemmXdl
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle.hpp
View file @
0aa899aa
...
@@ -249,7 +249,7 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -249,7 +249,7 @@ struct DeviceGemmXdl_C_Shuffle
{
{
using
Argument
=
DeviceGemmXdl_C_Shuffle
::
Argument
;
using
Argument
=
DeviceGemmXdl_C_Shuffle
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -306,6 +306,7 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -306,6 +306,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -341,6 +342,7 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -341,6 +342,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -357,9 +359,9 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -357,9 +359,9 @@ struct DeviceGemmXdl_C_Shuffle
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp
View file @
0aa899aa
...
@@ -268,7 +268,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -268,7 +268,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
{
{
using
Argument
=
DeviceGemmXdl_C_Shuffle_Bias_2d
::
Argument
;
using
Argument
=
DeviceGemmXdl_C_Shuffle_Bias_2d
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -331,6 +331,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -331,6 +331,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -371,6 +372,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -371,6 +372,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -389,9 +391,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -389,9 +391,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment