Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ae99bcb9
Commit
ae99bcb9
authored
Apr 06, 2022
by
Jehandad Khan
Browse files
update new upstream with PR changes
parent
dd6a8de4
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
65 additions
and
78 deletions
+65
-78
include/ck/tensor_operation/gpu/device/device_batched_gemm_reduce_xdl_cshuffle.hpp
...on/gpu/device/device_batched_gemm_reduce_xdl_cshuffle.hpp
+13
-6
include/ck/tensor_operation/gpu/device/device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp
...u/device/device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp
+7
-3
include/ck/tensor_operation/gpu/device/device_gemm_reduce_xdl_cshuffle.hpp
..._operation/gpu/device/device_gemm_reduce_xdl_cshuffle.hpp
+13
-6
include/ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp
.../tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp
+7
-44
include/ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp
...k/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp
+7
-3
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+11
-9
library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
...operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
+3
-2
library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
...r_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
+1
-1
library/src/tensor_operation_instance/gpu/conv3d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/conv3d_fwd/CMakeLists.txt
+1
-2
library/src/tensor_operation_instance/gpu/convnd_bwd_data/CMakeLists.txt
...sor_operation_instance/gpu/convnd_bwd_data/CMakeLists.txt
+1
-1
library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt
...tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt
+1
-1
No files found.
include/ck/tensor_operation/gpu/device/device_batched_gemm_reduce_xdl_cshuffle.hpp
View file @
ae99bcb9
...
@@ -673,7 +673,7 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
...
@@ -673,7 +673,7 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
/*
nrepeat
*/
=
1
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -710,6 +710,8 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
...
@@ -710,6 +710,8 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
const
bool
has_main_k0_block_loop
=
GridwiseGemm
::
CalculateHasMainK0BlockLoop
(
K0
);
const
bool
has_main_k0_block_loop
=
GridwiseGemm
::
CalculateHasMainK0BlockLoop
(
K0
);
float
elapsed_time
=
0.0
f
;
if
(
has_main_k0_block_loop
)
if
(
has_main_k0_block_loop
)
{
{
const
auto
kernel
=
kernel_batched_gemm_reduce_xdl_cshuffle_v1
<
const
auto
kernel
=
kernel_batched_gemm_reduce_xdl_cshuffle_v1
<
...
@@ -730,10 +732,12 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
...
@@ -730,10 +732,12 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
remove_reference_t
<
Block2CTileMap
>
,
remove_reference_t
<
Block2CTileMap
>
,
true
>
;
true
>
;
launch
_kernel
(
kernel
,
elapsed_time
=
launch_and_time
_kernel
(
kernel
,
nrepeat
,
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -772,10 +776,13 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
...
@@ -772,10 +776,13 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
remove_reference_t
<
Block2CTileMap
>
,
remove_reference_t
<
Block2CTileMap
>
,
false
>
;
false
>
;
launch_kernel
(
kernel
,
elapsed_time
=
launch_and_time_kernel
(
kernel
,
nrepeat
,
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -795,13 +802,13 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
...
@@ -795,13 +802,13 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
arg
.
block_2_ctile_map_
);
arg
.
block_2_ctile_map_
);
}
}
return
0
;
return
elapsed_time
;
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp
View file @
ae99bcb9
...
@@ -1241,7 +1241,7 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
...
@@ -1241,7 +1241,7 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
float
ave_time
=
0
;
float
ave_time
=
0
;
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
...
@@ -1322,6 +1322,8 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
...
@@ -1322,6 +1322,8 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -1355,6 +1357,8 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
...
@@ -1355,6 +1357,8 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -1370,9 +1374,9 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
...
@@ -1370,9 +1374,9 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
return
ave_time
;
return
ave_time
;
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_reduce_xdl_cshuffle.hpp
View file @
ae99bcb9
...
@@ -500,7 +500,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
...
@@ -500,7 +500,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
/*
nrepeat
*/
=
1
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -533,6 +533,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
...
@@ -533,6 +533,7 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
const
auto
K0
=
arg
.
a_grid_desc_ak0_m_ak1_
.
GetLength
(
I0
);
const
auto
K0
=
arg
.
a_grid_desc_ak0_m_ak1_
.
GetLength
(
I0
);
const
bool
has_main_k0_block_loop
=
GridwiseGemm
::
CalculateHasMainK0BlockLoop
(
K0
);
const
bool
has_main_k0_block_loop
=
GridwiseGemm
::
CalculateHasMainK0BlockLoop
(
K0
);
float
elapsed_time
=
0.0
f
;
if
(
has_main_k0_block_loop
)
if
(
has_main_k0_block_loop
)
{
{
...
@@ -553,10 +554,13 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
...
@@ -553,10 +554,13 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
true
>
;
true
>
;
launch_kernel
(
kernel
,
elapsed_time
=
launch_and_time_kernel
(
kernel
,
nrepeat
,
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -592,10 +596,13 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
...
@@ -592,10 +596,13 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
false
>
;
false
>
;
launch_kernel
(
kernel
,
elapsed_time
=
launch_and_time_kernel
(
kernel
,
nrepeat
,
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -613,13 +620,13 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
...
@@ -613,13 +620,13 @@ struct DeviceGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwiseOpera
arg
.
block_2_ctile_map_
);
arg
.
block_2_ctile_map_
);
}
}
return
0
;
return
elapsed_time
;
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp
View file @
ae99bcb9
...
@@ -435,7 +435,7 @@ struct DeviceGemm_Xdl_CShuffle
...
@@ -435,7 +435,7 @@ struct DeviceGemm_Xdl_CShuffle
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
#if 0
#if 0
{
{
...
@@ -483,31 +483,14 @@ struct DeviceGemm_Xdl_CShuffle
...
@@ -483,31 +483,14 @@ struct DeviceGemm_Xdl_CShuffle
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
true
>
;
true
>
;
if
(
nrepeat
==
0
)
{
launch_kernel
(
kernel
,
dim3
(
grid_size
),
dim3
(
BlockSize
),
0
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
a_element_op_
,
arg
.
b_element_op_
,
arg
.
c_element_op_
,
arg
.
a_grid_desc_ak0_m_ak1_
,
arg
.
b_grid_desc_bk0_n_bk1_
,
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
,
arg
.
block_2_ctile_map_
);
}
else
{
ave_time
=
ave_time
=
launch_and_time_kernel
(
kernel
,
launch_and_time_kernel
(
kernel
,
nrepeat
,
nrepeat
,
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -519,7 +502,6 @@ struct DeviceGemm_Xdl_CShuffle
...
@@ -519,7 +502,6 @@ struct DeviceGemm_Xdl_CShuffle
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
,
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
,
arg
.
block_2_ctile_map_
);
arg
.
block_2_ctile_map_
);
}
}
}
else
else
{
{
const
auto
kernel
=
kernel_gemm_xdl_cshuffle_v1
<
const
auto
kernel
=
kernel_gemm_xdl_cshuffle_v1
<
...
@@ -534,32 +516,14 @@ struct DeviceGemm_Xdl_CShuffle
...
@@ -534,32 +516,14 @@ struct DeviceGemm_Xdl_CShuffle
typename
GridwiseGemm
::
CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
,
typename
GridwiseGemm
::
CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
,
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
typename
GridwiseGemm
::
DefaultBlock2CTileMap
,
false
>
;
false
>
;
if
(
nrepeat
==
0
)
{
launch_kernel
(
kernel
,
dim3
(
grid_size
),
dim3
(
BlockSize
),
0
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
a_element_op_
,
arg
.
b_element_op_
,
arg
.
c_element_op_
,
arg
.
a_grid_desc_ak0_m_ak1_
,
arg
.
b_grid_desc_bk0_n_bk1_
,
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
,
arg
.
block_2_ctile_map_
);
}
else
{
ave_time
=
ave_time
=
launch_and_time_kernel
(
kernel
,
launch_and_time_kernel
(
kernel
,
nrepeat
,
nrepeat
,
dim3
(
grid_size
),
dim3
(
grid_size
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -571,15 +535,14 @@ struct DeviceGemm_Xdl_CShuffle
...
@@ -571,15 +535,14 @@ struct DeviceGemm_Xdl_CShuffle
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
,
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
,
arg
.
block_2_ctile_map_
);
arg
.
block_2_ctile_map_
);
}
}
}
return
ave_time
;
return
ave_time
;
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp
View file @
ae99bcb9
...
@@ -366,7 +366,7 @@ struct DeviceGroupedGemmXdl
...
@@ -366,7 +366,7 @@ struct DeviceGroupedGemmXdl
{
{
using
Argument
=
DeviceGroupedGemmXdl
::
Argument
;
using
Argument
=
DeviceGroupedGemmXdl
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
StaticallyIndexedArray
<
GemmDescKernelArg
,
MaxGroupCount
>
gemm_desc_kernel_arg_arg
;
StaticallyIndexedArray
<
GemmDescKernelArg
,
MaxGroupCount
>
gemm_desc_kernel_arg_arg
;
...
@@ -438,6 +438,8 @@ struct DeviceGroupedGemmXdl
...
@@ -438,6 +438,8 @@ struct DeviceGroupedGemmXdl
dim3
(
arg
.
grid_size_
),
dim3
(
arg
.
grid_size_
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
gemm_desc_kernel_arg_arg
,
gemm_desc_kernel_arg_arg
,
arg
.
gemm_desc_kernel_arg_
.
size
(),
arg
.
gemm_desc_kernel_arg_
.
size
(),
arg
.
a_element_op_
,
arg
.
a_element_op_
,
...
@@ -462,6 +464,8 @@ struct DeviceGroupedGemmXdl
...
@@ -462,6 +464,8 @@ struct DeviceGroupedGemmXdl
dim3
(
arg
.
grid_size_
),
dim3
(
arg
.
grid_size_
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
measure_time
,
gemm_desc_kernel_arg_arg
,
gemm_desc_kernel_arg_arg
,
arg
.
gemm_desc_kernel_arg_
.
size
(),
arg
.
gemm_desc_kernel_arg_
.
size
(),
arg
.
a_element_op_
,
arg
.
a_element_op_
,
...
@@ -473,9 +477,9 @@ struct DeviceGroupedGemmXdl
...
@@ -473,9 +477,9 @@ struct DeviceGroupedGemmXdl
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
ae99bcb9
...
@@ -19,7 +19,7 @@ include_directories(BEFORE
...
@@ -19,7 +19,7 @@ include_directories(BEFORE
function
(
add_instance_library INSTANCE_NAME
)
function
(
add_instance_library INSTANCE_NAME
)
message
(
"adding instance
${
INSTANCE_NAME
}
"
)
message
(
"adding instance
${
INSTANCE_NAME
}
"
)
add_library
(
${
INSTANCE_NAME
}
SHARED
${
ARGN
}
)
add_library
(
${
INSTANCE_NAME
}
OBJECT
${
ARGN
}
)
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
set_target_properties
(
${
INSTANCE_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
${
INSTANCE_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE ON
)
endfunction
(
add_instance_library INSTANCE_NAME
)
endfunction
(
add_instance_library INSTANCE_NAME
)
...
@@ -38,6 +38,10 @@ add_subdirectory(conv2d_fwd_bias_relu_add)
...
@@ -38,6 +38,10 @@ add_subdirectory(conv2d_fwd_bias_relu_add)
add_subdirectory
(
conv2d_fwd_bias_relu_atomic_add
)
add_subdirectory
(
conv2d_fwd_bias_relu_atomic_add
)
add_subdirectory
(
conv2d_bwd_data
)
add_subdirectory
(
conv2d_bwd_data
)
add_subdirectory
(
reduce
)
add_subdirectory
(
reduce
)
add_subdirectory
(
convnd_bwd_data
)
add_subdirectory
(
grouped_gemm
)
add_subdirectory
(
conv2d_bwd_weight
)
add_subdirectory
(
batched_gemm_reduce
)
add_library
(
device_operations STATIC
add_library
(
device_operations STATIC
$<TARGET_OBJECTS:device_conv1d_fwd_instance>
$<TARGET_OBJECTS:device_conv1d_fwd_instance>
...
@@ -52,6 +56,11 @@ add_library(device_operations STATIC
...
@@ -52,6 +56,11 @@ add_library(device_operations STATIC
$<TARGET_OBJECTS:device_gemm_bias_relu_add_instance>
$<TARGET_OBJECTS:device_gemm_bias_relu_add_instance>
$<TARGET_OBJECTS:device_gemm_bias2d_instance>
$<TARGET_OBJECTS:device_gemm_bias2d_instance>
$<TARGET_OBJECTS:device_reduce_instance>
$<TARGET_OBJECTS:device_reduce_instance>
$<TARGET_OBJECTS:device_convnd_bwd_data_instance>
$<TARGET_OBJECTS:device_grouped_gemm_instance>
$<TARGET_OBJECTS:device_conv2d_bwd_weight_instance>
$<TARGET_OBJECTS:device_batched_gemm_reduce_instance>
$<TARGET_OBJECTS:device_conv3d_fwd_instance>
device_conv2d.cpp
device_conv2d.cpp
)
)
add_library
(
composablekernels::device_operations ALIAS device_operations
)
add_library
(
composablekernels::device_operations ALIAS device_operations
)
...
@@ -86,11 +95,8 @@ target_include_directories(device_operations PUBLIC
...
@@ -86,11 +95,8 @@ target_include_directories(device_operations PUBLIC
#once new arches are enabled make this an option on the main cmake file
#once new arches are enabled make this an option on the main cmake file
# and pass down here to be exported
# and pass down here to be exported
target_compile_definitions
(
device_operations
PUBLIC -DCK_AMD_GPU_GFX908
)
target_compile_options
(
device_operations
target_compile_options
(
device_operations
PRIVATE -
amdgpu-target
=gfx908
PRIVATE -
-offload-arch
=gfx908
)
)
# install(TARGETS device_operations LIBRARY DESTINATION lib)
# install(TARGETS device_operations LIBRARY DESTINATION lib)
install
(
TARGETS device_operations
install
(
TARGETS device_operations
...
@@ -106,7 +112,3 @@ install(EXPORT device_operationsTargets
...
@@ -106,7 +112,3 @@ install(EXPORT device_operationsTargets
NAMESPACE composable_kernel::
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
)
add_subdirectory
(
convnd_bwd_data
)
add_subdirectory
(
grouped_gemm
)
add_subdirectory
(
conv2d_bwd_weight
)
add_subdirectory
(
batched_gemm_reduce
)
library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
View file @
ae99bcb9
...
@@ -5,7 +5,8 @@ set(DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
...
@@ -5,7 +5,8 @@ set(DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp
device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp
)
)
add_instance_library
(
device_batched_gemm_reduce_instance
${
DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
}
)
add_instance_library
(
device_batched_gemm_reduce_instance OBJECT
${
DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
}
)
install
(
TARGETS device_batched_gemm_reduce_instance LIBRARY DESTINATION lib
)
target_compile_features
(
device_batched_gemm_reduce_instance PUBLIC
)
set_target_properties
(
device_batched_gemm_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
clang_tidy_check
(
device_batched_gemm_reduce_instance
)
clang_tidy_check
(
device_batched_gemm_reduce_instance
)
library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
View file @
ae99bcb9
...
@@ -3,7 +3,7 @@ set(DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
...
@@ -3,7 +3,7 @@ set(DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
)
)
add_library
(
device_conv2d_bwd_weight_instance
SHARED
${
DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
}
)
add_library
(
device_conv2d_bwd_weight_instance
OBJECT
${
DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_bwd_weight_instance PUBLIC
)
target_compile_features
(
device_conv2d_bwd_weight_instance PUBLIC
)
set_target_properties
(
device_conv2d_bwd_weight_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv2d_bwd_weight_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_bwd_weight_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_conv2d_bwd_weight_instance LIBRARY DESTINATION lib
)
...
...
library/src/tensor_operation_instance/gpu/conv3d_fwd/CMakeLists.txt
View file @
ae99bcb9
...
@@ -5,9 +5,8 @@ set(DEVICE_CONV3D_FWD_INSTANCE_SOURCE
...
@@ -5,9 +5,8 @@ set(DEVICE_CONV3D_FWD_INSTANCE_SOURCE
device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp;
device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp;
device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp;
device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp;
)
)
add_library
(
device_conv3d_fwd_instance
SHARED
${
DEVICE_CONV3D_FWD_INSTANCE_SOURCE
}
)
add_library
(
device_conv3d_fwd_instance
OBJECT
${
DEVICE_CONV3D_FWD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv3d_fwd_instance PUBLIC
)
target_compile_features
(
device_conv3d_fwd_instance PUBLIC
)
set_target_properties
(
device_conv3d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_conv3d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv3d_fwd_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv3d_fwd_instance
)
clang_tidy_check
(
device_conv3d_fwd_instance
)
library/src/tensor_operation_instance/gpu/convnd_bwd_data/CMakeLists.txt
View file @
ae99bcb9
...
@@ -14,7 +14,7 @@ set(DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE
...
@@ -14,7 +14,7 @@ set(DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp;
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp;
)
)
add_library
(
device_convnd_bwd_data_instance
SHARED
${
DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE
}
)
add_library
(
device_convnd_bwd_data_instance
OBJECT
${
DEVICE_CONVND_BWD_DATA_INSTANCE_SOURCE
}
)
target_compile_features
(
device_convnd_bwd_data_instance PUBLIC
)
target_compile_features
(
device_convnd_bwd_data_instance PUBLIC
)
set_target_properties
(
device_convnd_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_convnd_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_convnd_bwd_data_instance LIBRARY DESTINATION lib
)
install
(
TARGETS device_convnd_bwd_data_instance LIBRARY DESTINATION lib
)
...
...
library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt
View file @
ae99bcb9
...
@@ -6,7 +6,7 @@ set(DEVICE_GROUPED_GEMM_INSTANCE_SOURCE
...
@@ -6,7 +6,7 @@ set(DEVICE_GROUPED_GEMM_INSTANCE_SOURCE
device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp;
)
)
add_library
(
device_grouped_gemm_instance
SHARED
${
DEVICE_GROUPED_GEMM_INSTANCE_SOURCE
}
)
add_library
(
device_grouped_gemm_instance
OBJECT
${
DEVICE_GROUPED_GEMM_INSTANCE_SOURCE
}
)
target_compile_features
(
device_grouped_gemm_instance PUBLIC
)
target_compile_features
(
device_grouped_gemm_instance PUBLIC
)
set_target_properties
(
device_grouped_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_grouped_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment