Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0aa899aa
"vscode:/vscode.git/clone" did not exist on "b76d9fde8de381a50d64c401b5d12864a28c5556"
Commit
0aa899aa
authored
Apr 06, 2022
by
Jehandad Khan
Browse files
add hipEvent based timing to kernels
parent
44757d6b
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
114 additions
and
73 deletions
+114
-73
CMakeLists.txt
CMakeLists.txt
+3
-1
example/14_client_app/CMakeLists.txt
example/14_client_app/CMakeLists.txt
+4
-2
example/14_client_app/client_app_impl.hpp
example/14_client_app/client_app_impl.hpp
+39
-2
example/CMakeLists.txt
example/CMakeLists.txt
+1
-0
include/ck/hip_version.hpp.in
include/ck/hip_version.hpp.in
+0
-28
include/ck/options.hpp.in
include/ck/options.hpp.in
+3
-0
include/ck/tensor_operation/gpu/device/device_base.hpp
include/ck/tensor_operation/gpu/device/device_base.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
...k/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...e_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
.../gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
..._fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
...nv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
...ation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp
.../gpu/device/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
...on/gpu/device/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
...ation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl.hpp
include/ck/tensor_operation/gpu/device/device_gemm_xdl.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle.hpp
...tensor_operation/gpu/device/device_gemm_xdl_c_shuffle.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp
...peration/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp
+5
-3
No files found.
CMakeLists.txt
View file @
0aa899aa
...
@@ -27,6 +27,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
...
@@ -27,6 +27,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
message
(
"CMAKE_CXX_COMPILER_ID:
${
CMAKE_CXX_COMPILER_ID
}
"
)
message
(
"CMAKE_CXX_COMPILER_ID:
${
CMAKE_CXX_COMPILER_ID
}
"
)
option
(
CK_TIME_KERNELS
"Time every kernel and log parameters"
OFF
)
## OpenMP
## OpenMP
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"Clang"
)
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"Clang"
)
# workaround issue hipcc in rocm3.5 cannot find openmp
# workaround issue hipcc in rocm3.5 cannot find openmp
...
@@ -227,7 +229,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
...
@@ -227,7 +229,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/bin
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/bin
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/
hip_vers
ion.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/
hip_vers
ion.hpp"
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/
opt
ion
s
.hpp.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/
opt
ion
s
.hpp"
)
include_directories
(
BEFORE
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/include
${
PROJECT_SOURCE_DIR
}
/include
...
...
example/14_client_app/CMakeLists.txt
View file @
0aa899aa
cmake_minimum_required
(
VERSION 3.15
)
cmake_minimum_required
(
VERSION 3.15
)
project
(
ck_app
)
project
(
ck_app
)
add_compile_options
(
-std=c++14
)
# add_link_options(--offload-arch=gfx908)
set
(
CMAKE_CXX_STANDARD 14
)
#
set(CMAKE_CXX_STANDARD 14)
set
(
CMAKE_CXX_STANDARD_REQUIRED True
)
#
set(CMAKE_CXX_STANDARD_REQUIRED True)
find_package
(
composable_kernel 1.0.0 COMPONENTS device_operations host_tensor
)
find_package
(
composable_kernel 1.0.0 COMPONENTS device_operations host_tensor
)
find_package
(
hip REQUIRED PATHS /opt/rocm
)
find_package
(
hip REQUIRED PATHS /opt/rocm
)
...
...
example/14_client_app/client_app_impl.hpp
View file @
0aa899aa
...
@@ -28,6 +28,36 @@ enum ConvOutputLayout
...
@@ -28,6 +28,36 @@ enum ConvOutputLayout
NHWK
,
// 1
NHWK
,
// 1
};
};
// Code to check CUDA errors
void
check_cuda_error
(
void
)
{
hipError_t
err
=
hipGetLastError
();
if
(
err
!=
hipSuccess
)
{
std
::
cerr
<<
"Error: "
<<
hipGetErrorString
(
err
)
<<
std
::
endl
;
exit
(
err
);
}
}
std
::
string
getDeviceName
(
int
device
)
{
struct
hipDeviceProp_t
prop
;
hipGetDeviceProperties
(
&
prop
,
device
);
check_cuda_error
();
return
std
::
string
(
prop
.
name
);
}
int
getDriver
(
void
)
{
int
driver
;
hipDriverGetVersion
(
&
driver
);
check_cuda_error
();
return
driver
;
}
namespace
ck
{
namespace
ck
{
namespace
app
{
namespace
app
{
...
@@ -127,6 +157,14 @@ void profile_conv_fwd_impl(int do_verification,
...
@@ -127,6 +157,14 @@ void profile_conv_fwd_impl(int do_verification,
float
best_ave_time
=
0
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
float
best_gb_per_sec
=
0
;
int
deviceIndex
=
0
;
hipSetDevice
(
deviceIndex
);
check_cuda_error
();
hipStream_t
stream_id
=
nullptr
;
hipStreamCreate
(
&
stream_id
);
check_cuda_error
();
// profile device Conv instances
// profile device Conv instances
for
(
auto
&
conv_ptr
:
conv_ptrs
)
for
(
auto
&
conv_ptr
:
conv_ptrs
)
...
@@ -151,8 +189,7 @@ void profile_conv_fwd_impl(int do_verification,
...
@@ -151,8 +189,7 @@ void profile_conv_fwd_impl(int do_verification,
if
(
conv_ptr
.
IsSupportedArgument
(
argument_ptr
.
get
()))
if
(
conv_ptr
.
IsSupportedArgument
(
argument_ptr
.
get
()))
{
{
std
::
string
conv_name
=
conv_ptr
.
GetTypeString
();
std
::
string
conv_name
=
conv_ptr
.
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
,
stream_id
,
true
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
,
nullptr
);
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
;
...
...
example/CMakeLists.txt
View file @
0aa899aa
...
@@ -22,6 +22,7 @@ function(add_example_executable EXAMPLE_NAME)
...
@@ -22,6 +22,7 @@ function(add_example_executable EXAMPLE_NAME)
message
(
"adding example
${
EXAMPLE_NAME
}
"
)
message
(
"adding example
${
EXAMPLE_NAME
}
"
)
add_executable
(
${
EXAMPLE_NAME
}
${
ARGN
}
)
add_executable
(
${
EXAMPLE_NAME
}
${
ARGN
}
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE host_tensor
)
target_link_libraries
(
${
EXAMPLE_NAME
}
PRIVATE host_tensor
)
set_target_properties
(
${
EXAMPLE_NAME
}
PROPERTIES EXCLUDE_FROM_ALL 1
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
add_dependencies
(
examples
${
EXAMPLE_NAME
}
)
endfunction
(
add_example_executable EXAMPLE_NAME
)
endfunction
(
add_example_executable EXAMPLE_NAME
)
...
...
include/ck/hip_version.hpp.in
deleted
100644 → 0
View file @
44757d6b
#pragma once
// "_PACKAGE_" to avoid name contentions: the macros like
// HIP_VERSION_MAJOR are defined in HIP_VERSION.h.
// clang-format off
#define CK_HIP_PACKAGE_VERSION_MAJOR @CK_HIP_VERSION_MAJOR@
#define CK_HIP_PACKAGE_VERSION_MINOR @CK_HIP_VERSION_MINOR@
#define CK_HIP_PACKAGE_VERSION_PATCH @CK_HIP_VERSION_PATCH@
// clang-format on
#ifndef CK_HIP_PACKAGE_VERSION_MAJOR
#define CK_HIP_PACKAGE_VERSION_MAJOR 0
#endif
#ifndef CK_HIP_PACKAGE_VERSION_MINOR
#define CK_HIP_PACKAGE_VERSION_MINOR 0
#endif
#ifndef CK_HIP_PACKAGE_VERSION_PATCH
#define CK_HIP_PACKAGE_VERSION_PATCH 0
#endif
// 3 decimal digits for major and minor, 6 digits for patch number.
// Max number is 999,999,999999 == 0xE8,D4A5,0FFF that fits into 64-bit math.
#if CK_HIP_PACKAGE_VERSION_MAJOR > 999 || CK_HIP_PACKAGE_VERSION_MAJOR > 999 || \
CK_HIP_PACKAGE_VERSION_PATCH > 999999
#error "Too big HIP version number(s)"
#endif
#define CK_HIP_PACKAGE_VERSION_FLAT \
((CK_HIP_PACKAGE_VERSION_MAJOR * 1000ULL + CK_HIP_PACKAGE_VERSION_MINOR) * 1000000 + \
CK_HIP_PACKAGE_VERSION_PATCH)
include/ck/options.hpp.in
0 → 100644
View file @
0aa899aa
#pragma once
#cmakedefine01 CK_TIME_KERNELS
\ No newline at end of file
include/ck/tensor_operation/gpu/device/device_base.hpp
View file @
0aa899aa
...
@@ -22,7 +22,7 @@ struct BaseInvoker
...
@@ -22,7 +22,7 @@ struct BaseInvoker
BaseInvoker
(
const
BaseInvoker
&
)
=
default
;
BaseInvoker
(
const
BaseInvoker
&
)
=
default
;
BaseInvoker
&
operator
=
(
const
BaseInvoker
&
)
=
default
;
BaseInvoker
&
operator
=
(
const
BaseInvoker
&
)
=
default
;
virtual
float
Run
(
const
BaseArgument
*
,
int
=
1
,
hipStream_t
=
nullptr
){
return
-
1
;}
virtual
float
Run
(
const
BaseArgument
*
,
int
=
1
,
hipStream_t
=
nullptr
,
bool
=
false
){
return
-
1
;}
virtual
~
BaseInvoker
()
{}
virtual
~
BaseInvoker
()
{}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
View file @
0aa899aa
...
@@ -274,7 +274,7 @@ struct DeviceBatchedGemmXdl
...
@@ -274,7 +274,7 @@ struct DeviceBatchedGemmXdl
{
{
using
Argument
=
DeviceBatchedGemmXdl
::
Argument
;
using
Argument
=
DeviceBatchedGemmXdl
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_g_k0_m_k1_{"
std
::
cout
<<
"arg.a_grid_desc_g_k0_m_k1_{"
...
@@ -336,6 +336,7 @@ struct DeviceBatchedGemmXdl
...
@@ -336,6 +336,7 @@ struct DeviceBatchedGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -369,6 +370,7 @@ struct DeviceBatchedGemmXdl
...
@@ -369,6 +370,7 @@ struct DeviceBatchedGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -385,9 +387,9 @@ struct DeviceBatchedGemmXdl
...
@@ -385,9 +387,9 @@ struct DeviceBatchedGemmXdl
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -414,7 +414,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -414,7 +414,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
}
}
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
ShowInfo
(
arg
);
ShowInfo
(
arg
);
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_kbatch_k0_m_k1_
,
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_kbatch_k0_m_k1_
,
...
@@ -445,6 +445,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -445,6 +445,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -561,9 +562,9 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -561,9 +562,9 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -521,7 +521,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -521,7 +521,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
nrepeat
=
1
;
nrepeat
=
1
;
float
ave_time
=
0
;
float
ave_time
=
0
;
...
@@ -600,6 +600,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -600,6 +600,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -634,6 +635,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -634,6 +635,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -649,9 +651,9 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -649,9 +651,9 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -642,7 +642,7 @@ struct
...
@@ -642,7 +642,7 @@ struct
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -734,6 +734,7 @@ struct
...
@@ -734,6 +734,7 @@ struct
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -779,6 +780,7 @@ struct
...
@@ -779,6 +780,7 @@ struct
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -798,9 +800,9 @@ struct
...
@@ -798,9 +800,9 @@ struct
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -607,7 +607,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -607,7 +607,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -693,6 +693,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -693,6 +693,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -733,6 +734,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -733,6 +734,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -750,9 +752,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
...
@@ -750,9 +752,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -568,7 +568,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -568,7 +568,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -670,6 +670,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -670,6 +670,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -705,6 +706,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -705,6 +706,7 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -720,9 +722,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
...
@@ -720,9 +722,9 @@ struct DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -450,7 +450,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -450,7 +450,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -506,6 +506,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -506,6 +506,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -538,6 +539,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -538,6 +539,7 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -553,9 +555,9 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -553,9 +555,9 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp
View file @
0aa899aa
...
@@ -98,7 +98,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
...
@@ -98,7 +98,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
naive_conv3d_fwd
=
const
auto
naive_conv3d_fwd
=
ref
::
naive_conv_fwd_ndhwc_kzyxc_ndhwk
<
InDataType
,
ref
::
naive_conv_fwd_ndhwc_kzyxc_ndhwk
<
InDataType
,
...
@@ -115,6 +115,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
...
@@ -115,6 +115,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
dim3
(
256
),
dim3
(
256
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_in_
,
arg
.
p_in_
,
arg
.
p_wei_
,
arg
.
p_wei_
,
arg
.
p_out_
,
arg
.
p_out_
,
...
@@ -144,9 +145,9 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
...
@@ -144,9 +145,9 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp
View file @
0aa899aa
...
@@ -430,7 +430,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -430,7 +430,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"num_batches_of_GEMM = "
<<
arg
.
num_subbatches_
<<
std
::
endl
;
std
::
cout
<<
"num_batches_of_GEMM = "
<<
arg
.
num_subbatches_
<<
std
::
endl
;
...
@@ -485,6 +485,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -485,6 +485,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -521,6 +522,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -521,6 +522,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -541,9 +543,9 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
...
@@ -541,9 +543,9 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
0aa899aa
...
@@ -591,7 +591,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -591,7 +591,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -645,6 +645,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -645,6 +645,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -677,6 +678,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -677,6 +678,7 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -692,9 +694,9 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -692,9 +694,9 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl.hpp
View file @
0aa899aa
...
@@ -290,7 +290,7 @@ struct DeviceGemmXdl
...
@@ -290,7 +290,7 @@ struct DeviceGemmXdl
{
{
using
Argument
=
DeviceGemmXdl
::
Argument
;
using
Argument
=
DeviceGemmXdl
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -344,6 +344,7 @@ struct DeviceGemmXdl
...
@@ -344,6 +344,7 @@ struct DeviceGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -376,6 +377,7 @@ struct DeviceGemmXdl
...
@@ -376,6 +377,7 @@ struct DeviceGemmXdl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -392,9 +394,9 @@ struct DeviceGemmXdl
...
@@ -392,9 +394,9 @@ struct DeviceGemmXdl
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle.hpp
View file @
0aa899aa
...
@@ -249,7 +249,7 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -249,7 +249,7 @@ struct DeviceGemmXdl_C_Shuffle
{
{
using
Argument
=
DeviceGemmXdl_C_Shuffle
::
Argument
;
using
Argument
=
DeviceGemmXdl_C_Shuffle
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -306,6 +306,7 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -306,6 +306,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -341,6 +342,7 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -341,6 +342,7 @@ struct DeviceGemmXdl_C_Shuffle
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -357,9 +359,9 @@ struct DeviceGemmXdl_C_Shuffle
...
@@ -357,9 +359,9 @@ struct DeviceGemmXdl_C_Shuffle
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp
View file @
0aa899aa
...
@@ -268,7 +268,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -268,7 +268,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
{
{
using
Argument
=
DeviceGemmXdl_C_Shuffle_Bias_2d
::
Argument
;
using
Argument
=
DeviceGemmXdl_C_Shuffle_Bias_2d
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -331,6 +331,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -331,6 +331,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -371,6 +372,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -371,6 +372,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -389,9 +391,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
...
@@ -389,9 +391,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_2d
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment