Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e00a943e
Commit
e00a943e
authored
May 17, 2022
by
myamlak
Browse files
Merge remote-tracking branch 'origin/develop' into myamlak/cgemm
parents
ffe12e2e
9f71ff48
Changes
162
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
180 additions
and
93 deletions
+180
-93
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
..._operation/cpu/reference_conv_fwd_bias_activation_add.hpp
+2
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
...library/reference_tensor_operation/cpu/reference_gemm.hpp
+2
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
...reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
+2
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation.hpp
...e_tensor_operation/cpu/reference_gemm_bias_activation.hpp
+2
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation_add.hpp
...nsor_operation/cpu/reference_gemm_bias_activation_add.hpp
+2
-1
library/include/ck/library/utility/conv_util.hpp
library/include/ck/library/utility/conv_util.hpp
+42
-42
library/include/ck/library/utility/op_instance_engine.hpp
library/include/ck/library/utility/op_instance_engine.hpp
+2
-2
library/src/host_tensor/CMakeLists.txt
library/src/host_tensor/CMakeLists.txt
+23
-2
library/src/host_tensor/device.cpp
library/src/host_tensor/device.cpp
+14
-15
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+73
-1
library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
...tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
+3
-3
library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
...operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
+3
-2
library/src/tensor_operation_instance/gpu/conv1d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/conv1d_fwd/CMakeLists.txt
+3
-3
library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
...sor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
+1
-3
library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
...r_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
+1
-1
library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
+1
-3
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
...peration_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
+1
-3
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
...tion_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
+1
-3
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_atomic_add/CMakeLists.txt
...stance/gpu/conv2d_fwd_bias_relu_atomic_add/CMakeLists.txt
+1
-3
library/src/tensor_operation_instance/gpu/conv3d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/conv3d_fwd/CMakeLists.txt
+1
-2
No files found.
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
View file @
e00a943e
...
...
@@ -130,7 +130,8 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
return
0
;
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/*stream_config*/
=
StreamConfig
{})
override
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
View file @
e00a943e
...
...
@@ -80,7 +80,8 @@ struct ReferenceGemm : public device::BaseOperator
return
0
;
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
View file @
e00a943e
...
...
@@ -82,7 +82,8 @@ struct ReferenceGemmBias2D : public device::BaseOperator
return
0
;
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation.hpp
View file @
e00a943e
...
...
@@ -85,7 +85,8 @@ struct ReferenceGemmBiasActivation : public device::BaseOperator
return
0
;
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation_add.hpp
View file @
e00a943e
...
...
@@ -91,7 +91,8 @@ struct ReferenceGemmBiasActivationAdd : public device::BaseOperator
return
0
;
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
...
...
library/include/ck/library/utility/conv_
fwd_
util.hpp
→
library/include/ck/library/utility/conv_util.hpp
View file @
e00a943e
...
...
@@ -146,19 +146,19 @@ struct ConvParams
const
std
::
vector
<
ck
::
index_t
>&
left_pads
,
const
std
::
vector
<
ck
::
index_t
>&
right_pads
);
ck
::
index_t
num_dim_spatial
;
ck
::
index_t
N
;
ck
::
index_t
K
;
ck
::
index_t
C
;
ck
::
index_t
num_dim_spatial
_
;
ck
::
index_t
N
_
;
ck
::
index_t
K
_
;
ck
::
index_t
C
_
;
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
;
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
;
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
_
;
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
_
;
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
;
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
;
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
_
;
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
_
;
std
::
vector
<
ck
::
index_t
>
input_left_pads
;
std
::
vector
<
ck
::
index_t
>
input_right_pads
;
std
::
vector
<
ck
::
index_t
>
input_left_pads
_
;
std
::
vector
<
ck
::
index_t
>
input_right_pads
_
;
std
::
vector
<
ck
::
index_t
>
GetOutputSpatialLengths
()
const
;
};
...
...
@@ -268,10 +268,10 @@ void run_reference_convolution_forward(const ConvParams& params,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
input
,
weights
,
output
,
params
.
conv_filter_strides
,
params
.
conv_filter_dilations
,
params
.
input_left_pads
,
params
.
input_right_pads
,
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
_
,
params
.
input_right_pads
_
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
...
...
@@ -437,17 +437,17 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
virtual
InTensorsTuple
GetInputTensors
()
const
override
{
std
::
vector
<
std
::
size_t
>
input_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
),
static_cast
<
std
::
size_t
>
(
params_
.
C
)};
std
::
vector
<
std
::
size_t
>
input_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
_
),
static_cast
<
std
::
size_t
>
(
params_
.
C
_
)};
input_dims
.
insert
(
std
::
end
(
input_dims
),
std
::
begin
(
params_
.
input_spatial_lengths
),
std
::
end
(
params_
.
input_spatial_lengths
));
std
::
begin
(
params_
.
input_spatial_lengths
_
),
std
::
end
(
params_
.
input_spatial_lengths
_
));
std
::
vector
<
std
::
size_t
>
filter_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
K
),
static_cast
<
std
::
size_t
>
(
params_
.
C
)};
std
::
vector
<
std
::
size_t
>
filter_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
K
_
),
static_cast
<
std
::
size_t
>
(
params_
.
C
_
)};
filter_dims
.
insert
(
std
::
end
(
filter_dims
),
std
::
begin
(
params_
.
filter_spatial_lengths
),
std
::
end
(
params_
.
filter_spatial_lengths
));
std
::
begin
(
params_
.
filter_spatial_lengths
_
),
std
::
end
(
params_
.
filter_spatial_lengths
_
));
auto
input
=
std
::
make_unique
<
Tensor
<
InDataType
>>
(
get_host_tensor_descriptor
(
input_dims
,
InLayout
{}));
...
...
@@ -465,8 +465,8 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
virtual
TensorPtr
<
OutDataType
>
GetOutputTensor
()
const
override
{
std
::
vector
<
std
::
size_t
>
output_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
),
static_cast
<
std
::
size_t
>
(
params_
.
K
)};
std
::
vector
<
std
::
size_t
>
output_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
_
),
static_cast
<
std
::
size_t
>
(
params_
.
K
_
)};
output_dims
.
insert
(
std
::
end
(
output_dims
),
std
::
begin
(
output_spatial_lengths_
),
std
::
end
(
output_spatial_lengths_
));
...
...
@@ -522,16 +522,16 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
static_cast
<
InDataType
*>
(
in_device_buffers
[
0
]
->
GetDeviceBuffer
()),
static_cast
<
WeiDataType
*>
(
in_device_buffers
[
1
]
->
GetDeviceBuffer
()),
static_cast
<
OutDataType
*>
(
out_device_buffer
->
GetDeviceBuffer
()),
params_
.
N
,
params_
.
K
,
params_
.
C
,
params_
.
input_spatial_lengths
,
params_
.
filter_spatial_lengths
,
params_
.
N
_
,
params_
.
K
_
,
params_
.
C
_
,
params_
.
input_spatial_lengths
_
,
params_
.
filter_spatial_lengths
_
,
output_spatial_lengths_
,
params_
.
conv_filter_strides
,
params_
.
conv_filter_dilations
,
params_
.
input_left_pads
,
params_
.
input_right_pads
,
params_
.
conv_filter_strides
_
,
params_
.
conv_filter_dilations
_
,
params_
.
input_left_pads
_
,
params_
.
input_right_pads
_
,
InElementwiseOp
{},
WeiElementwiseOp
{},
OutElementwiseOp
{});
...
...
@@ -539,20 +539,20 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
virtual
std
::
size_t
GetFlops
()
const
override
{
return
get_flops
(
params_
.
N
,
params_
.
C
,
params_
.
K
,
params_
.
filter_spatial_lengths
,
return
get_flops
(
params_
.
N
_
,
params_
.
C
_
,
params_
.
K
_
,
params_
.
filter_spatial_lengths
_
,
output_spatial_lengths_
);
}
virtual
std
::
size_t
GetBtype
()
const
override
{
return
get_btype
<
InDataType
,
WeiDataType
,
OutDataType
>
(
params_
.
N
,
params_
.
C
,
params_
.
K
,
params_
.
input_spatial_lengths
,
params_
.
filter_spatial_lengths
,
return
get_btype
<
InDataType
,
WeiDataType
,
OutDataType
>
(
params_
.
N
_
,
params_
.
C
_
,
params_
.
K
_
,
params_
.
input_spatial_lengths
_
,
params_
.
filter_spatial_lengths
_
,
output_spatial_lengths_
);
}
...
...
library/include/ck/library/utility/op_instance_engine.hpp
View file @
e00a943e
...
...
@@ -128,7 +128,7 @@ class OpInstanceRunEngine
template
<
typename
OpInstancePtr
>
ProfileBestConfig
Profile
(
const
std
::
vector
<
OpInstancePtr
>&
op_ptrs
,
int
nrepeat
=
100
,
bool
time_kernel
=
false
,
bool
do_verification
=
false
,
bool
do_log
=
false
)
{
...
...
@@ -143,7 +143,7 @@ class OpInstanceRunEngine
if
(
op_ptr
->
IsSupportedArgument
(
argument
.
get
()))
{
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
avg_time
=
invoker
->
Run
(
argument
.
get
(),
nrepeat
);
float
avg_time
=
invoker
->
Run
(
argument
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
}
);
std
::
size_t
flops
=
op_instance_
.
GetFlops
();
std
::
size_t
num_btype
=
op_instance_
.
GetBtype
();
...
...
library/src/host_tensor/CMakeLists.txt
View file @
e00a943e
...
...
@@ -10,10 +10,31 @@ set(HOST_TENSOR_SOURCE
host_tensor.cpp
)
add_library
(
host_tensor SHARED
${
HOST_TENSOR_SOURCE
}
)
add_library
(
host_tensor STATIC
${
HOST_TENSOR_SOURCE
}
)
add_library
(
composable_kernel::host_tensor ALIAS host_tensor
)
target_compile_features
(
host_tensor PUBLIC
)
set_target_properties
(
host_tensor PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:
${
HALF_INCLUDE_DIR
}
>
)
install
(
TARGETS host_tensor LIBRARY DESTINATION lib
)
target_include_directories
(
host_tensor PUBLIC
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/utility>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host_tensor>"
)
install
(
TARGETS host_tensor
EXPORT host_tensorTargets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
RUNTIME DESTINATION
${
CMAKE_INSTALL_BINDIR
}
INCLUDES DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
install
(
EXPORT host_tensorTargets
FILE composable_kernelhost_tensorTargets.cmake
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
clang_tidy_check
(
host_tensor
)
library/src/host_tensor/device.cpp
View file @
e00a943e
...
...
@@ -2,7 +2,7 @@
DeviceMem
::
DeviceMem
(
std
::
size_t
mem_size
)
:
mMemSize
(
mem_size
)
{
hip
GetErrorString
(
hipMalloc
(
static_cast
<
void
**>
(
&
mpDeviceBuf
),
mMemSize
));
hip
_check_error
(
hipMalloc
(
static_cast
<
void
**>
(
&
mpDeviceBuf
),
mMemSize
));
}
void
*
DeviceMem
::
GetDeviceBuffer
()
{
return
mpDeviceBuf
;
}
...
...
@@ -11,49 +11,48 @@ std::size_t DeviceMem::GetBufferSize() { return mMemSize; }
void
DeviceMem
::
ToDevice
(
const
void
*
p
)
{
hipGetErrorString
(
hipMemcpy
(
mpDeviceBuf
,
const_cast
<
void
*>
(
p
),
mMemSize
,
hipMemcpyHostToDevice
));
hip_check_error
(
hipMemcpy
(
mpDeviceBuf
,
const_cast
<
void
*>
(
p
),
mMemSize
,
hipMemcpyHostToDevice
));
}
void
DeviceMem
::
FromDevice
(
void
*
p
)
{
hip
GetErrorString
(
hipMemcpy
(
p
,
mpDeviceBuf
,
mMemSize
,
hipMemcpyDeviceToHost
));
hip
_check_error
(
hipMemcpy
(
p
,
mpDeviceBuf
,
mMemSize
,
hipMemcpyDeviceToHost
));
}
void
DeviceMem
::
SetZero
()
{
hip
GetErrorString
(
hipMemset
(
mpDeviceBuf
,
0
,
mMemSize
));
}
void
DeviceMem
::
SetZero
()
{
hip
_check_error
(
hipMemset
(
mpDeviceBuf
,
0
,
mMemSize
));
}
DeviceMem
::~
DeviceMem
()
{
hip
GetErrorString
(
hipFree
(
mpDeviceBuf
));
}
DeviceMem
::~
DeviceMem
()
{
hip
_check_error
(
hipFree
(
mpDeviceBuf
));
}
struct
KernelTimerImpl
{
KernelTimerImpl
()
{
hip
GetErrorString
(
hipEventCreate
(
&
mStart
));
hip
GetErrorString
(
hipEventCreate
(
&
mEnd
));
hip
_check_error
(
hipEventCreate
(
&
mStart
));
hip
_check_error
(
hipEventCreate
(
&
mEnd
));
}
~
KernelTimerImpl
()
{
hip
GetErrorString
(
hipEventDestroy
(
mStart
));
hip
GetErrorString
(
hipEventDestroy
(
mEnd
));
hip
_check_error
(
hipEventDestroy
(
mStart
));
hip
_check_error
(
hipEventDestroy
(
mEnd
));
}
void
Start
()
{
hip
GetErrorString
(
hipDeviceSynchronize
());
hip
GetErrorString
(
hipEventRecord
(
mStart
,
nullptr
));
hip
_check_error
(
hipDeviceSynchronize
());
hip
_check_error
(
hipEventRecord
(
mStart
,
nullptr
));
}
void
End
()
{
hip
GetErrorString
(
hipEventRecord
(
mEnd
,
nullptr
));
hip
GetErrorString
(
hipEventSynchronize
(
mEnd
));
hip
_check_error
(
hipEventRecord
(
mEnd
,
nullptr
));
hip
_check_error
(
hipEventSynchronize
(
mEnd
));
}
float
GetElapsedTime
()
const
{
float
time
;
hip
GetErrorString
(
hipEventElapsedTime
(
&
time
,
mStart
,
mEnd
));
hip
_check_error
(
hipEventElapsedTime
(
&
time
,
mStart
,
mEnd
));
return
time
;
}
...
...
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
e00a943e
...
...
@@ -11,6 +11,7 @@ include_directories(BEFORE
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/thread
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/element
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host_tensor
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance/gpu/reduce
${
PROJECT_SOURCE_DIR
}
/external/include/half
...
...
@@ -18,7 +19,7 @@ include_directories(BEFORE
function
(
add_instance_library INSTANCE_NAME
)
message
(
"adding instance
${
INSTANCE_NAME
}
"
)
add_library
(
${
INSTANCE_NAME
}
SHARED
${
ARGN
}
)
add_library
(
${
INSTANCE_NAME
}
OBJECT
${
ARGN
}
)
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
set_target_properties
(
${
INSTANCE_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE ON
)
endfunction
(
add_instance_library INSTANCE_NAME
)
...
...
@@ -42,3 +43,74 @@ add_subdirectory(grouped_gemm)
add_subdirectory
(
conv2d_bwd_weight
)
add_subdirectory
(
batched_gemm_reduce
)
add_subdirectory
(
cgemm
)
add_library
(
device_operations STATIC
$<TARGET_OBJECTS:device_conv1d_fwd_instance>
$<TARGET_OBJECTS:device_batched_gemm_instance>
$<TARGET_OBJECTS:device_conv2d_bwd_data_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_bias_relu_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_bias_relu_add_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_bias_relu_atomic_add_instance>
$<TARGET_OBJECTS:device_gemm_instance>
$<TARGET_OBJECTS:device_gemm_bias_relu_instance>
$<TARGET_OBJECTS:device_gemm_bias_relu_add_instance>
$<TARGET_OBJECTS:device_gemm_bias2d_instance>
$<TARGET_OBJECTS:device_reduce_instance>
$<TARGET_OBJECTS:device_convnd_bwd_data_instance>
$<TARGET_OBJECTS:device_grouped_gemm_instance>
$<TARGET_OBJECTS:device_conv2d_bwd_weight_instance>
$<TARGET_OBJECTS:device_batched_gemm_reduce_instance>
$<TARGET_OBJECTS:device_conv3d_fwd_instance>
$<TARGET_OBJECTS:device_cgemm_instance>
device_conv2d.cpp
)
add_library
(
composablekernels::device_operations ALIAS device_operations
)
set
(
DEV_OPS_INC_DIRS
${
PROJECT_SOURCE_DIR
}
/include/ck/
${
PROJECT_SOURCE_DIR
}
/library/include/ck/
${
PROJECT_SOURCE_DIR
}
/external/include/
)
target_compile_features
(
device_operations PUBLIC
)
set_target_properties
(
device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
device_operations PUBLIC
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/utility>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_description>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/problem_transform>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/device>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/grid>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/block>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/warp>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/thread>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/element>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host_tensor>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/tensor_operation_instance>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/tensor_operation_instance/gpu/reduce>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/half>
)
#once new arches are enabled make this an option on the main cmake file
# and pass down here to be exported
target_compile_options
(
device_operations
PRIVATE --offload-arch=gfx908
)
# install(TARGETS device_operations LIBRARY DESTINATION lib)
install
(
TARGETS device_operations
EXPORT device_operationsTargets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
RUNTIME DESTINATION
${
CMAKE_INSTALL_BINDIR
}
INCLUDES DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
install
(
DIRECTORY
${
DEV_OPS_INC_DIRS
}
DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck
)
install
(
EXPORT device_operationsTargets
FILE composable_kerneldevice_operationsTargets.cmake
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
View file @
e00a943e
...
...
@@ -18,9 +18,9 @@ set(DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp;
)
add_library
(
device_batched_gemm_instance
SHARED
${
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
}
)
target_compile_features
(
device_batched_gemm_instance PUBLIC
)
add_library
(
device_batched_gemm_instance
OBJECT
${
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
}
)
#
target_compile_features(device_batched_gemm_instance PUBLIC)
set_target_properties
(
device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib
)
#
install(TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib)
clang_tidy_check
(
device_batched_gemm_instance
)
library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
View file @
e00a943e
...
...
@@ -5,7 +5,8 @@ set(DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp
)
add_instance_library
(
device_batched_gemm_reduce_instance
${
DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
}
)
install
(
TARGETS device_batched_gemm_reduce_instance LIBRARY DESTINATION lib
)
add_instance_library
(
device_batched_gemm_reduce_instance OBJECT
${
DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
}
)
target_compile_features
(
device_batched_gemm_reduce_instance PUBLIC
)
set_target_properties
(
device_batched_gemm_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
clang_tidy_check
(
device_batched_gemm_reduce_instance
)
library/src/tensor_operation_instance/gpu/conv1d_fwd/CMakeLists.txt
View file @
e00a943e
...
...
@@ -6,9 +6,9 @@ set(DEVICE_CONV1D_FWD_INSTANCE_SOURCE
device_conv1d_fwd_xdl_nwc_kxc_nwk_int8_instance.cpp;
)
add_library
(
device_conv1d_fwd_instance
SHARED
${
DEVICE_CONV1D_FWD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv1d_fwd_instance PUBLIC
)
add_library
(
device_conv1d_fwd_instance
OBJECT
${
DEVICE_CONV1D_FWD_INSTANCE_SOURCE
}
)
#
target_compile_features(device_conv1d_fwd_instance PUBLIC)
set_target_properties
(
device_conv1d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv1d_fwd_instance LIBRARY DESTINATION lib
)
#
install(TARGETS device_conv1d_fwd_instance LIBRARY DESTINATION lib)
clang_tidy_check
(
device_conv1d_fwd_instance
)
library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
View file @
e00a943e
...
...
@@ -6,9 +6,7 @@ set(DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
)
add_library
(
device_conv2d_bwd_data_instance SHARED
${
DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_bwd_data_instance PUBLIC
)
add_library
(
device_conv2d_bwd_data_instance OBJECT
${
DEVICE_CONV2D_BWD_DATA_INSTANCE_SOURCE
}
)
set_target_properties
(
device_conv2d_bwd_data_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_bwd_data_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_bwd_data_instance
)
library/src/tensor_operation_instance/gpu/conv2d_bwd_weight/CMakeLists.txt
View file @
e00a943e
...
...
@@ -3,7 +3,7 @@ set(DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
device_conv2d_bwd_weight_xdl_nhwc_kyxc_nhwk_f32_instance.cpp;
)
add_library
(
device_conv2d_bwd_weight_instance
SHARED
${
DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
}
)
add_library
(
device_conv2d_bwd_weight_instance
OBJECT
${
DEVICE_CONV2D_BWD_WEIGHT_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_bwd_weight_instance PUBLIC
)
set_target_properties
(
device_conv2d_bwd_weight_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_bwd_weight_instance LIBRARY DESTINATION lib
)
...
...
library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
View file @
e00a943e
...
...
@@ -6,9 +6,7 @@ set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp;
device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_instance SHARED
${
DEVICE_CONV2D_FWD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_instance PUBLIC
)
add_library
(
device_conv2d_fwd_instance OBJECT
${
DEVICE_CONV2D_FWD_INSTANCE_SOURCE
}
)
set_target_properties
(
device_conv2d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_instance
)
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
View file @
e00a943e
...
...
@@ -2,9 +2,7 @@
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_bias_relu_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_bias_relu_instance PUBLIC
)
add_library
(
device_conv2d_fwd_bias_relu_instance OBJECT
${
DEVICE_CONV2D_FWD_BIAS_RELU_INSTANCE_SOURCE
}
)
set_target_properties
(
device_conv2d_fwd_bias_relu_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_bias_relu_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_bias_relu_instance
)
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
View file @
e00a943e
...
...
@@ -2,9 +2,7 @@
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_bias_relu_add_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_bias_relu_add_instance PUBLIC
)
add_library
(
device_conv2d_fwd_bias_relu_add_instance OBJECT
${
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
}
)
set_target_properties
(
device_conv2d_fwd_bias_relu_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_bias_relu_add_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_bias_relu_add_instance
)
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_atomic_add/CMakeLists.txt
View file @
e00a943e
...
...
@@ -3,9 +3,7 @@ set(DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
device_conv2d_fwd_xdl_c_shuffle_bias_relu_atomic_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_bias_relu_atomic_add_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv2d_fwd_bias_relu_atomic_add_instance PUBLIC
)
add_library
(
device_conv2d_fwd_bias_relu_atomic_add_instance OBJECT
${
DEVICE_CONV2D_FWD_BIAS_RELU_ATOMIC_ADD_INSTANCE_SOURCE
}
)
set_target_properties
(
device_conv2d_fwd_bias_relu_atomic_add_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv2d_fwd_bias_relu_atomic_add_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv2d_fwd_bias_relu_atomic_add_instance
)
library/src/tensor_operation_instance/gpu/conv3d_fwd/CMakeLists.txt
View file @
e00a943e
...
...
@@ -5,9 +5,8 @@ set(DEVICE_CONV3D_FWD_INSTANCE_SOURCE
device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp;
device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp;
)
add_library
(
device_conv3d_fwd_instance
SHARED
${
DEVICE_CONV3D_FWD_INSTANCE_SOURCE
}
)
add_library
(
device_conv3d_fwd_instance
OBJECT
${
DEVICE_CONV3D_FWD_INSTANCE_SOURCE
}
)
target_compile_features
(
device_conv3d_fwd_instance PUBLIC
)
set_target_properties
(
device_conv3d_fwd_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_conv3d_fwd_instance LIBRARY DESTINATION lib
)
clang_tidy_check
(
device_conv3d_fwd_instance
)
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment